1 /* Function summary pass.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
64 #include "tree-streamer.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
86 #include "tree-into-ssa.h"
87 #include "symtab-clones.h"
90 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
91 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
92 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
94 /* Edge predicates goes here. */
95 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
100 ipa_dump_hints (FILE *f
, ipa_hints hints
)
104 fprintf (f
, "IPA hints:");
105 if (hints
& INLINE_HINT_indirect_call
)
107 hints
&= ~INLINE_HINT_indirect_call
;
108 fprintf (f
, " indirect_call");
110 if (hints
& INLINE_HINT_loop_iterations
)
112 hints
&= ~INLINE_HINT_loop_iterations
;
113 fprintf (f
, " loop_iterations");
115 if (hints
& INLINE_HINT_loop_stride
)
117 hints
&= ~INLINE_HINT_loop_stride
;
118 fprintf (f
, " loop_stride");
120 if (hints
& INLINE_HINT_same_scc
)
122 hints
&= ~INLINE_HINT_same_scc
;
123 fprintf (f
, " same_scc");
125 if (hints
& INLINE_HINT_in_scc
)
127 hints
&= ~INLINE_HINT_in_scc
;
128 fprintf (f
, " in_scc");
130 if (hints
& INLINE_HINT_cross_module
)
132 hints
&= ~INLINE_HINT_cross_module
;
133 fprintf (f
, " cross_module");
135 if (hints
& INLINE_HINT_declared_inline
)
137 hints
&= ~INLINE_HINT_declared_inline
;
138 fprintf (f
, " declared_inline");
140 if (hints
& INLINE_HINT_known_hot
)
142 hints
&= ~INLINE_HINT_known_hot
;
143 fprintf (f
, " known_hot");
145 if (hints
& INLINE_HINT_builtin_constant_p
)
147 hints
&= ~INLINE_HINT_builtin_constant_p
;
148 fprintf (f
, " builtin_constant_p");
154 /* Record SIZE and TIME to SUMMARY.
155 The accounted code will be executed when EXEC_PRED is true.
156 When NONCONST_PRED is false the code will evaluate to constant and
157 will get optimized out in specialized clones of the function.
158 If CALL is true account to call_size_time_table rather than
162 ipa_fn_summary::account_size_time (int size
, sreal time
,
163 const predicate
&exec_pred
,
164 const predicate
&nonconst_pred_in
,
170 predicate nonconst_pred
;
171 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
173 if (exec_pred
== false)
176 nonconst_pred
= nonconst_pred_in
& exec_pred
;
178 if (nonconst_pred
== false)
181 /* We need to create initial empty unconditional clause, but otherwise
182 we don't need to account empty times and sizes. */
183 if (!size
&& time
== 0 && table
->length ())
186 /* Only for calls we are unaccounting what we previously recorded. */
187 gcc_checking_assert (time
>= 0 || call
);
189 for (i
= 0; table
->iterate (i
, &e
); i
++)
190 if (e
->exec_predicate
== exec_pred
191 && e
->nonconst_predicate
== nonconst_pred
)
196 if (i
== max_size_time_table_size
)
201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
203 "\t\tReached limit on number of entries, "
204 "ignoring the predicate.");
206 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
209 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
210 ((double) size
) / ipa_fn_summary::size_scale
,
211 (time
.to_double ()), found
? "" : "new ");
212 exec_pred
.dump (dump_file
, conds
, 0);
213 if (exec_pred
!= nonconst_pred
)
215 fprintf (dump_file
, " nonconst:");
216 nonconst_pred
.dump (dump_file
, conds
);
219 fprintf (dump_file
, "\n");
223 class size_time_entry new_entry
;
224 new_entry
.size
= size
;
225 new_entry
.time
= time
;
226 new_entry
.exec_predicate
= exec_pred
;
227 new_entry
.nonconst_predicate
= nonconst_pred
;
229 call_size_time_table
.safe_push (new_entry
);
231 size_time_table
.safe_push (new_entry
);
237 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
238 /* Tolerate small roundoff issues. */
244 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
246 static struct cgraph_edge
*
247 redirect_to_unreachable (struct cgraph_edge
*e
)
249 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
250 struct cgraph_node
*target
= cgraph_node::get_create
251 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
254 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
256 e
= cgraph_edge::make_direct (e
, target
);
258 e
->redirect_callee (target
);
259 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
260 e
->inline_failed
= CIF_UNREACHABLE
;
261 e
->count
= profile_count::zero ();
262 es
->call_stmt_size
= 0;
263 es
->call_stmt_time
= 0;
265 callee
->remove_symbol_and_inline_clones ();
269 /* Set predicate for edge E. */
272 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
274 /* If the edge is determined to be never executed, redirect it
275 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
277 if (predicate
&& *predicate
== false
278 /* When handling speculative edges, we need to do the redirection
279 just once. Do it always on the direct edge, so we do not
280 attempt to resolve speculation while duplicating the edge. */
281 && (!e
->speculative
|| e
->callee
))
282 e
= redirect_to_unreachable (e
);
284 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
285 if (predicate
&& *predicate
!= true)
288 es
->predicate
= edge_predicate_pool
.allocate ();
289 *es
->predicate
= *predicate
;
294 edge_predicate_pool
.remove (es
->predicate
);
295 es
->predicate
= NULL
;
299 /* Set predicate for hint *P. */
302 set_hint_predicate (predicate
**p
, predicate new_predicate
)
304 if (new_predicate
== false || new_predicate
== true)
307 edge_predicate_pool
.remove (*p
);
313 *p
= edge_predicate_pool
.allocate ();
318 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
319 Otherwise add a new item to the vector with this predicate and frerq equal
320 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
321 in which case the function does nothing. */
324 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
325 const predicate
&new_predicate
, sreal add_freq
,
326 unsigned max_num_predicates
)
328 if (new_predicate
== false || new_predicate
== true)
330 ipa_freqcounting_predicate
*f
;
331 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
332 if (new_predicate
== f
->predicate
)
337 if (vec_safe_length (*v
) >= max_num_predicates
)
338 /* Too many different predicates to account for. */
341 ipa_freqcounting_predicate fcp
;
342 fcp
.predicate
= NULL
;
343 set_hint_predicate (&fcp
.predicate
, new_predicate
);
345 vec_safe_push (*v
, fcp
);
349 /* Compute what conditions may or may not hold given information about
350 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
351 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
352 copy when called in a given context. It is a bitmask of conditions. Bit
353 0 means that condition is known to be false, while bit 1 means that condition
354 may or may not be true. These differs - for example NOT_INLINED condition
355 is always false in the second and also builtin_constant_p tests cannot use
356 the fact that parameter is indeed a constant.
358 When INLINE_P is true, assume that we are inlining. AVAL contains known
359 information about argument values. The function does not modify its content
360 and so AVALs could also be of type ipa_call_arg_values but so far all
361 callers work with the auto version and so we avoid the conversion for
364 ERROR_MARK value of an argument means compile time invariant. */
367 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
369 ipa_auto_call_arg_values
*avals
,
370 clause_t
*ret_clause
,
371 clause_t
*ret_nonspec_clause
)
373 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
374 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
375 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
379 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
384 struct expr_eval_op
*op
;
386 /* We allow call stmt to have fewer arguments than the callee function
387 (especially for K&R style programs). So bound check here (we assume
388 m_known_aggs vector is either empty or has the same length as
390 gcc_checking_assert (!avals
->m_known_aggs
.length ()
391 || !avals
->m_known_vals
.length ()
392 || (avals
->m_known_vals
.length ()
393 == avals
->m_known_aggs
.length ()));
397 if (c
->code
== predicate::changed
399 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
402 if (ipa_agg_value_set
*agg
= avals
->safe_aggval_at (c
->operand_num
))
404 tree sval
= avals
->safe_sval_at (c
->operand_num
);
405 val
= ipa_find_agg_cst_for_param (agg
, sval
, c
->offset
,
413 val
= avals
->safe_sval_at (c
->operand_num
);
414 if (val
&& val
== error_mark_node
&& c
->code
!= predicate::changed
)
419 && (c
->code
== predicate::changed
420 || c
->code
== predicate::is_not_constant
))
422 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
423 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
426 if (c
->code
== predicate::changed
)
428 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
432 if (c
->code
== predicate::is_not_constant
)
434 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
438 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
440 if (c
->type
!= TREE_TYPE (val
))
441 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
442 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
447 val
= fold_unary (op
->code
, op
->type
, val
);
448 else if (!op
->val
[1])
449 val
= fold_binary (op
->code
, op
->type
,
450 op
->index
? op
->val
[0] : val
,
451 op
->index
? val
: op
->val
[0]);
452 else if (op
->index
== 0)
453 val
= fold_ternary (op
->code
, op
->type
,
454 val
, op
->val
[0], op
->val
[1]);
455 else if (op
->index
== 1)
456 val
= fold_ternary (op
->code
, op
->type
,
457 op
->val
[0], val
, op
->val
[1]);
458 else if (op
->index
== 2)
459 val
= fold_ternary (op
->code
, op
->type
,
460 op
->val
[0], op
->val
[1], val
);
466 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
469 if (res
&& integer_zerop (res
))
471 if (res
&& integer_onep (res
))
473 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
474 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
478 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
480 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
482 value_range vr
= avals
->m_known_value_ranges
[c
->operand_num
];
483 if (!vr
.undefined_p ()
485 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
487 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
490 range_fold_unary_expr (&res
, NOP_EXPR
,
491 c
->type
, &vr
, vr
.type ());
496 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
498 if (vr
.varying_p () || vr
.undefined_p ())
503 range_fold_unary_expr (&res
, op
->code
, op
->type
, &vr
, type
);
504 else if (!op
->val
[1])
506 value_range
op0 (op
->val
[0], op
->val
[0]);
507 range_fold_binary_expr (&res
, op
->code
, op
->type
,
508 op
->index
? &op0
: &vr
,
509 op
->index
? &vr
: &op0
);
516 if (!vr
.varying_p () && !vr
.undefined_p ())
519 value_range
val_vr (c
->val
, c
->val
);
520 range_fold_binary_expr (&res
, c
->code
, boolean_type_node
,
529 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
530 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
532 *ret_clause
= clause
;
533 if (ret_nonspec_clause
)
534 *ret_nonspec_clause
= nonspec_clause
;
537 /* Return true if VRP will be exectued on the function.
538 We do not want to anticipate optimizations that will not happen.
540 FIXME: This can be confused with -fdisable and debug counters and thus
541 it should not be used for correctness (only to make heuristics work).
542 This means that inliner should do its own optimizations of expressions
543 that it predicts to be constant so wrong code can not be triggered by
544 builtin_constant_p. */
547 vrp_will_run_p (struct cgraph_node
*node
)
549 return (opt_for_fn (node
->decl
, optimize
)
550 && !opt_for_fn (node
->decl
, optimize_debug
)
551 && opt_for_fn (node
->decl
, flag_tree_vrp
));
554 /* Similarly about FRE. */
557 fre_will_run_p (struct cgraph_node
*node
)
559 return (opt_for_fn (node
->decl
, optimize
)
560 && !opt_for_fn (node
->decl
, optimize_debug
)
561 && opt_for_fn (node
->decl
, flag_tree_fre
));
564 /* Work out what conditions might be true at invocation of E.
565 Compute costs for inlined edge if INLINE_P is true.
567 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
568 (if non-NULL) conditions evaluated for nonspecialized clone called
571 Vectors in AVALS will be populated with useful known information about
572 argument values - information not known to have any uses will be omitted -
573 except for m_known_contexts which will only be calculated if
574 COMPUTE_CONTEXTS is true. */
577 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
578 clause_t
*clause_ptr
,
579 clause_t
*nonspec_clause_ptr
,
580 ipa_auto_call_arg_values
*avals
,
581 bool compute_contexts
)
583 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
584 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
585 class ipa_edge_args
*args
;
588 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
590 if (ipa_node_params_sum
591 && !e
->call_stmt_cannot_inline_p
592 && (info
->conds
|| compute_contexts
)
593 && (args
= IPA_EDGE_REF (e
)) != NULL
)
595 struct cgraph_node
*caller
;
596 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
597 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
598 int i
, count
= ipa_get_cs_argument_count (args
);
602 if (e
->caller
->inlined_to
)
603 caller
= e
->caller
->inlined_to
;
606 caller_parms_info
= IPA_NODE_REF (caller
);
607 callee_pi
= IPA_NODE_REF (callee
);
609 /* Watch for thunks. */
611 /* Watch for variadic functions. */
612 count
= MIN (count
, ipa_get_param_count (callee_pi
));
616 for (i
= 0; i
< count
; i
++)
618 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
620 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
621 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
623 /* Determine if we know constant value of the parameter. */
624 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
625 ipa_get_type (callee_pi
, i
));
627 if (!cst
&& e
->call_stmt
628 && i
< (int)gimple_call_num_args (e
->call_stmt
))
630 cst
= gimple_call_arg (e
->call_stmt
, i
);
631 if (!is_gimple_min_invariant (cst
))
636 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
637 if (!avals
->m_known_vals
.length ())
638 avals
->m_known_vals
.safe_grow_cleared (count
, true);
639 avals
->m_known_vals
[i
] = cst
;
641 else if (inline_p
&& !es
->param
[i
].change_prob
)
643 if (!avals
->m_known_vals
.length ())
644 avals
->m_known_vals
.safe_grow_cleared (count
, true);
645 avals
->m_known_vals
[i
] = error_mark_node
;
648 /* If we failed to get simple constant, try value range. */
649 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
650 && vrp_will_run_p (caller
)
651 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
654 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
655 ipa_get_type (callee_pi
,
657 if (!vr
.undefined_p () && !vr
.varying_p ())
659 if (!avals
->m_known_value_ranges
.length ())
661 avals
->m_known_value_ranges
.safe_grow (count
, true);
662 for (int i
= 0; i
< count
; ++i
)
663 new (&avals
->m_known_value_ranges
[i
])
666 avals
->m_known_value_ranges
[i
] = vr
;
670 /* Determine known aggregate values. */
671 if (fre_will_run_p (caller
))
673 ipa_agg_value_set agg
674 = ipa_agg_value_set_from_jfunc (caller_parms_info
,
676 if (agg
.items
.length ())
678 if (!avals
->m_known_aggs
.length ())
679 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
680 avals
->m_known_aggs
[i
] = agg
;
685 /* For calls used in polymorphic calls we further determine
686 polymorphic call context. */
688 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
690 ipa_polymorphic_call_context
691 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
692 if (!ctx
.useless_p ())
694 if (!avals
->m_known_contexts
.length ())
695 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
696 avals
->m_known_contexts
[i
]
697 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
702 gcc_assert (!count
|| callee
->thunk
);
704 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
706 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
708 for (i
= 0; i
< count
; i
++)
710 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
711 if (!is_gimple_min_invariant (cst
))
715 if (!avals
->m_known_vals
.length ())
716 avals
->m_known_vals
.safe_grow_cleared (count
, true);
717 avals
->m_known_vals
[i
] = cst
;
722 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
727 /* Allocate the function summary. */
730 ipa_fn_summary_alloc (void)
732 gcc_checking_assert (!ipa_fn_summaries
);
733 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
734 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
735 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
738 ipa_call_summary::~ipa_call_summary ()
741 edge_predicate_pool
.remove (predicate
);
746 ipa_fn_summary::~ipa_fn_summary ()
748 unsigned len
= vec_safe_length (loop_iterations
);
749 for (unsigned i
= 0; i
< len
; i
++)
750 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
751 len
= vec_safe_length (loop_strides
);
752 for (unsigned i
= 0; i
< len
; i
++)
753 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
755 call_size_time_table
.release ();
756 vec_free (loop_iterations
);
757 vec_free (loop_strides
);
758 builtin_constant_p_parms
.release ();
762 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
765 for (e
= node
->callees
; e
; e
= e
->next_callee
)
766 ipa_call_summaries
->remove (e
);
767 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
768 ipa_call_summaries
->remove (e
);
771 /* Duplicate predicates in loop hint vector, allocating memory for them and
772 remove and deallocate any uninteresting (true or false) ones. Return the
775 static vec
<ipa_freqcounting_predicate
, va_gc
> *
776 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
777 clause_t possible_truths
)
779 if (vec_safe_length (v
) == 0)
782 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
783 int len
= res
->length();
784 for (int i
= len
- 1; i
>= 0; i
--)
786 predicate new_predicate
787 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
788 /* We do not want to free previous predicate; it is used by node
790 (*res
)[i
].predicate
= NULL
;
791 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
793 if (!(*res
)[i
].predicate
)
794 res
->unordered_remove (i
);
801 /* Hook that is called by cgraph.c when a node is duplicated. */
803 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
805 ipa_fn_summary
*src_info
,
806 ipa_fn_summary
*info
)
808 new (info
) ipa_fn_summary (*src_info
);
809 /* TODO: as an optimization, we may avoid copying conditions
810 that are known to be false or true. */
811 info
->conds
= vec_safe_copy (info
->conds
);
813 clone_info
*cinfo
= clone_info::get (dst
);
814 /* When there are any replacements in the function body, see if we can figure
815 out that something was optimized out. */
816 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
818 /* Use SRC parm info since it may not be copied yet. */
819 class ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
820 ipa_auto_call_arg_values avals
;
821 int count
= ipa_get_param_count (parms_info
);
823 clause_t possible_truths
;
824 predicate true_pred
= true;
826 int optimized_out_size
= 0;
827 bool inlined_to_p
= false;
828 struct cgraph_edge
*edge
, *next
;
830 info
->size_time_table
.release ();
831 avals
.m_known_vals
.safe_grow_cleared (count
, true);
832 for (i
= 0; i
< count
; i
++)
834 struct ipa_replace_map
*r
;
836 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
838 if (r
->parm_num
== i
)
840 avals
.m_known_vals
[i
] = r
->new_tree
;
845 evaluate_conditions_for_known_args (dst
, false,
848 /* We are going to specialize,
849 so ignore nonspec truths. */
852 info
->account_size_time (0, 0, true_pred
, true_pred
);
854 /* Remap size_time vectors.
855 Simplify the predicate by pruning out alternatives that are known
857 TODO: as on optimization, we can also eliminate conditions known
859 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
861 predicate new_exec_pred
;
862 predicate new_nonconst_pred
;
863 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
865 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
867 if (new_exec_pred
== false || new_nonconst_pred
== false)
868 optimized_out_size
+= e
->size
;
870 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
874 /* Remap edge predicates with the same simplification as above.
875 Also copy constantness arrays. */
876 for (edge
= dst
->callees
; edge
; edge
= next
)
878 predicate new_predicate
;
879 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
880 next
= edge
->next_callee
;
882 if (!edge
->inline_failed
)
886 new_predicate
= es
->predicate
->remap_after_duplication
888 if (new_predicate
== false && *es
->predicate
!= false)
889 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
890 edge_set_predicate (edge
, &new_predicate
);
893 /* Remap indirect edge predicates with the same simplification as above.
894 Also copy constantness arrays. */
895 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
897 predicate new_predicate
;
898 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
899 next
= edge
->next_callee
;
901 gcc_checking_assert (edge
->inline_failed
);
904 new_predicate
= es
->predicate
->remap_after_duplication
906 if (new_predicate
== false && *es
->predicate
!= false)
908 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
909 edge_set_predicate (edge
, &new_predicate
);
911 info
->loop_iterations
912 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
915 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
917 if (info
->builtin_constant_p_parms
.length())
919 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
921 info
->builtin_constant_p_parms
= vNULL
;
922 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
923 if (!avals
.m_known_vals
[ip
])
924 info
->builtin_constant_p_parms
.safe_push (ip
);
927 /* If inliner or someone after inliner will ever start producing
928 non-trivial clones, we will get trouble with lack of information
929 about updating self sizes, because size vectors already contains
930 sizes of the callees. */
931 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
935 info
->size_time_table
= src_info
->size_time_table
.copy ();
936 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
937 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
939 info
->builtin_constant_p_parms
940 = info
->builtin_constant_p_parms
.copy ();
942 ipa_freqcounting_predicate
*f
;
943 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
945 predicate p
= *f
->predicate
;
947 set_hint_predicate (&f
->predicate
, p
);
949 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
951 predicate p
= *f
->predicate
;
953 set_hint_predicate (&f
->predicate
, p
);
956 if (!dst
->inlined_to
)
957 ipa_update_overall_fn_summary (dst
);
961 /* Hook that is called by cgraph.c when a node is duplicated. */
964 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
965 struct cgraph_edge
*dst
,
966 class ipa_call_summary
*srcinfo
,
967 class ipa_call_summary
*info
)
969 new (info
) ipa_call_summary (*srcinfo
);
970 info
->predicate
= NULL
;
971 edge_set_predicate (dst
, srcinfo
->predicate
);
972 info
->param
= srcinfo
->param
.copy ();
973 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
975 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
976 - eni_size_weights
.call_cost
);
977 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
978 - eni_time_weights
.call_cost
);
982 /* Dump edge summaries associated to NODE and recursively to all clones.
986 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
987 class ipa_fn_summary
*info
)
989 struct cgraph_edge
*edge
;
990 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
992 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
993 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
997 "%*s%s %s\n%*s freq:%4.2f",
998 indent
, "", callee
->dump_name (),
1000 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1001 indent
, "", edge
->sreal_frequency ().to_double ());
1003 if (cross_module_call_p (edge
))
1004 fprintf (f
, " cross module");
1007 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1008 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1010 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1011 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1013 fprintf (f
, " callee size:%2i stack:%2i",
1014 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1015 (int) s
->estimated_stack_size
);
1017 if (es
&& es
->predicate
)
1019 fprintf (f
, " predicate: ");
1020 es
->predicate
->dump (f
, info
->conds
);
1024 if (es
&& es
->param
.exists ())
1025 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1027 int prob
= es
->param
[i
].change_prob
;
1030 fprintf (f
, "%*s op%i is compile time invariant\n",
1032 else if (prob
!= REG_BR_PROB_BASE
)
1033 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1034 prob
* 100.0 / REG_BR_PROB_BASE
);
1035 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1036 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1039 if (!edge
->inline_failed
)
1041 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1042 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1044 (int) ipa_get_stack_frame_offset (callee
),
1045 (int) ss
->estimated_self_stack_size
);
1046 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1049 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1051 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1052 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1056 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1057 es
->call_stmt_time
);
1060 fprintf (f
, "predicate: ");
1061 es
->predicate
->dump (f
, info
->conds
);
1070 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1072 if (node
->definition
)
1074 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1075 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1080 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1081 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1082 fprintf (f
, " always_inline");
1084 fprintf (f
, " inlinable");
1085 if (s
->fp_expressions
)
1086 fprintf (f
, " fp_expression");
1087 if (s
->builtin_constant_p_parms
.length ())
1089 fprintf (f
, " builtin_constant_p_parms");
1090 for (unsigned int i
= 0;
1091 i
< s
->builtin_constant_p_parms
.length (); i
++)
1092 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1094 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1095 fprintf (f
, " self size: %i\n", ss
->self_size
);
1096 fprintf (f
, " global size: %i\n", ss
->size
);
1097 fprintf (f
, " min size: %i\n", s
->min_size
);
1098 fprintf (f
, " self stack: %i\n",
1099 (int) ss
->estimated_self_stack_size
);
1100 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1102 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1104 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1105 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1107 fprintf (f
, " size:%f, time:%f",
1108 (double) e
->size
/ ipa_fn_summary::size_scale
,
1109 e
->time
.to_double ());
1110 if (e
->exec_predicate
!= true)
1112 fprintf (f
, ", executed if:");
1113 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1115 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1117 fprintf (f
, ", nonconst if:");
1118 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1122 ipa_freqcounting_predicate
*fcp
;
1123 bool first_fcp
= true;
1124 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1128 fprintf (f
, " loop iterations:");
1131 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1132 fcp
->predicate
->dump (f
, s
->conds
);
1135 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1139 fprintf (f
, " loop strides:");
1142 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1143 fcp
->predicate
->dump (f
, s
->conds
);
1145 fprintf (f
, " calls:\n");
1146 dump_ipa_call_summary (f
, 4, node
, s
);
1150 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1155 ipa_debug_fn_summary (struct cgraph_node
*node
)
1157 ipa_dump_fn_summary (stderr
, node
);
1161 ipa_dump_fn_summaries (FILE *f
)
1163 struct cgraph_node
*node
;
1165 FOR_EACH_DEFINED_FUNCTION (node
)
1166 if (!node
->inlined_to
)
1167 ipa_dump_fn_summary (f
, node
);
1170 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1171 boolean variable pointed to by DATA. */
1174 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1177 bool *b
= (bool *) data
;
1182 /* If OP refers to value of function parameter, return the corresponding
1183 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1184 PARM_DECL) will be stored to *SIZE_P in that case too. */
1187 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1190 /* SSA_NAME referring to parm default def? */
1191 if (TREE_CODE (op
) == SSA_NAME
1192 && SSA_NAME_IS_DEFAULT_DEF (op
)
1193 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1196 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1197 return SSA_NAME_VAR (op
);
1199 /* Non-SSA parm reference? */
1200 if (TREE_CODE (op
) == PARM_DECL
1201 && fbi
->aa_walk_budget
> 0)
1203 bool modified
= false;
1206 ao_ref_init (&refd
, op
);
1207 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1208 mark_modified
, &modified
, NULL
, NULL
,
1209 fbi
->aa_walk_budget
);
1212 fbi
->aa_walk_budget
= 0;
1215 fbi
->aa_walk_budget
-= walked
;
1219 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1226 /* If OP refers to value of function parameter, return the corresponding
1227 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1228 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1229 stored to *SIZE_P in that case too. */
1232 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1235 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1239 if (TREE_CODE (op
) == SSA_NAME
1240 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1241 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1242 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1243 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1248 /* If OP refers to a value of a function parameter or value loaded from an
1249 aggregate passed to a parameter (either by value or reference), return TRUE
1250 and store the number of the parameter to *INDEX_P, the access size into
1251 *SIZE_P, and information whether and how it has been loaded from an
1252 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1253 statement in which OP is used or loaded. */
1256 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1257 gimple
*stmt
, tree op
, int *index_p
,
1259 struct agg_position_info
*aggpos
)
1261 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1263 gcc_checking_assert (aggpos
);
1266 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1269 aggpos
->agg_contents
= false;
1270 aggpos
->by_ref
= false;
1274 if (TREE_CODE (op
) == SSA_NAME
)
1276 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1277 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1279 stmt
= SSA_NAME_DEF_STMT (op
);
1280 op
= gimple_assign_rhs1 (stmt
);
1281 if (!REFERENCE_CLASS_P (op
))
1282 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1286 aggpos
->agg_contents
= true;
1287 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1288 stmt
, op
, index_p
, &aggpos
->offset
,
1289 size_p
, &aggpos
->by_ref
);
1292 /* See if statement might disappear after inlining.
1293 0 - means not eliminated
1294 1 - half of statements goes away
1295 2 - for sure it is eliminated.
1296 We are not terribly sophisticated, basically looking for simple abstraction
1297 penalty wrappers. */
1300 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1302 enum gimple_code code
= gimple_code (stmt
);
1303 enum tree_code rhs_code
;
1313 if (gimple_num_ops (stmt
) != 2)
1316 rhs_code
= gimple_assign_rhs_code (stmt
);
1318 /* Casts of parameters, loads from parameters passed by reference
1319 and stores to return value or parameters are often free after
1320 inlining due to SRA and further combining.
1321 Assume that half of statements goes away. */
1322 if (CONVERT_EXPR_CODE_P (rhs_code
)
1323 || rhs_code
== VIEW_CONVERT_EXPR
1324 || rhs_code
== ADDR_EXPR
1325 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1327 tree rhs
= gimple_assign_rhs1 (stmt
);
1328 tree lhs
= gimple_assign_lhs (stmt
);
1329 tree inner_rhs
= get_base_address (rhs
);
1330 tree inner_lhs
= get_base_address (lhs
);
1331 bool rhs_free
= false;
1332 bool lhs_free
= false;
1339 /* Reads of parameter are expected to be free. */
1340 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1342 /* Match expressions of form &this->field. Those will most likely
1343 combine with something upstream after inlining. */
1344 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1346 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1347 if (TREE_CODE (op
) == PARM_DECL
)
1349 else if (TREE_CODE (op
) == MEM_REF
1350 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1355 /* When parameter is not SSA register because its address is taken
1356 and it is just copied into one, the statement will be completely
1357 free after inlining (we will copy propagate backward). */
1358 if (rhs_free
&& is_gimple_reg (lhs
))
1361 /* Reads of parameters passed by reference
1362 expected to be free (i.e. optimized out after inlining). */
1363 if (TREE_CODE (inner_rhs
) == MEM_REF
1364 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1367 /* Copying parameter passed by reference into gimple register is
1368 probably also going to copy propagate, but we can't be quite
1370 if (rhs_free
&& is_gimple_reg (lhs
))
1373 /* Writes to parameters, parameters passed by value and return value
1374 (either directly or passed via invisible reference) are free.
1376 TODO: We ought to handle testcase like
1377 struct a {int a,b;};
1385 This translate into:
1400 For that we either need to copy ipa-split logic detecting writes
1402 if (TREE_CODE (inner_lhs
) == PARM_DECL
1403 || TREE_CODE (inner_lhs
) == RESULT_DECL
1404 || (TREE_CODE (inner_lhs
) == MEM_REF
1405 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1407 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1408 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1409 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1411 0))) == RESULT_DECL
))))
1414 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1416 if (lhs_free
&& rhs_free
)
1425 /* Analyze EXPR if it represents a series of simple operations performed on
1426 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1427 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1428 Type of the parameter or load from an aggregate via the parameter is
1429 stored in *TYPE_P. Operations on the parameter are recorded to
1430 PARAM_OPS_P if it is not NULL. */
1433 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1434 gimple
*stmt
, tree expr
,
1435 int *index_p
, tree
*type_p
,
1436 struct agg_position_info
*aggpos
,
1437 expr_eval_ops
*param_ops_p
= NULL
)
1439 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1443 *param_ops_p
= NULL
;
1447 expr_eval_op eval_op
;
1449 unsigned cst_count
= 0;
1451 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1454 tree type
= TREE_TYPE (expr
);
1456 if (aggpos
->agg_contents
)
1458 /* Stop if containing bit-field. */
1459 if (TREE_CODE (expr
) == BIT_FIELD_REF
1460 || contains_bitfld_component_ref_p (expr
))
1468 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1471 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1474 switch (gimple_assign_rhs_class (stmt
))
1476 case GIMPLE_SINGLE_RHS
:
1477 expr
= gimple_assign_rhs1 (stmt
);
1480 case GIMPLE_UNARY_RHS
:
1484 case GIMPLE_BINARY_RHS
:
1488 case GIMPLE_TERNARY_RHS
:
1496 /* Stop if expression is too complex. */
1497 if (op_count
++ == op_limit
)
1502 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1503 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1504 eval_op
.val
[0] = NULL_TREE
;
1505 eval_op
.val
[1] = NULL_TREE
;
1509 for (unsigned i
= 0; i
< rhs_count
; i
++)
1511 tree op
= gimple_op (stmt
, i
+ 1);
1513 gcc_assert (op
&& !TYPE_P (op
));
1514 if (is_gimple_ip_invariant (op
))
1516 if (++cst_count
== rhs_count
)
1519 eval_op
.val
[cst_count
- 1] = op
;
1523 /* Found a non-constant operand, and record its index in rhs
1530 /* Found more than one non-constant operands. */
1536 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1539 /* Failed to decompose, free resource and return. */
1542 vec_free (*param_ops_p
);
1547 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1550 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1554 /* Avoid duplicates. */
1555 for (unsigned int i
= 0;
1556 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1559 summary
->builtin_constant_p_parms
.safe_push (parm
);
1562 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1563 predicates to the CFG edges. */
1566 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1567 class ipa_fn_summary
*summary
,
1568 class ipa_node_params
*params_summary
,
1574 struct agg_position_info aggpos
;
1575 enum tree_code code
, inverted_code
;
1580 expr_eval_ops param_ops
;
1582 last
= last_stmt (bb
);
1583 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1585 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1587 op
= gimple_cond_lhs (last
);
1589 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1592 code
= gimple_cond_code (last
);
1593 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1595 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1597 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1598 ? code
: inverted_code
);
1599 /* invert_tree_comparison will return ERROR_MARK on FP
1600 comparisons that are not EQ/NE instead of returning proper
1601 unordered one. Be sure it is not confused with NON_CONSTANT.
1603 And if the edge's target is the final block of diamond CFG graph
1604 of this conditional statement, we do not need to compute
1605 predicate for the edge because the final block's predicate must
1606 be at least as that of the first block of the statement. */
1607 if (this_code
!= ERROR_MARK
1608 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1611 = add_condition (summary
, params_summary
, index
,
1612 param_type
, &aggpos
,
1613 this_code
, gimple_cond_rhs (last
), param_ops
);
1614 e
->aux
= edge_predicate_pool
.allocate ();
1615 *(predicate
*) e
->aux
= p
;
1618 vec_free (param_ops
);
1621 if (TREE_CODE (op
) != SSA_NAME
)
1624 if (builtin_constant_p (op))
1628 Here we can predicate nonconstant_code. We can't
1629 really handle constant_code since we have no predicate
1630 for this and also the constant code is not known to be
1631 optimized away when inliner doesn't see operand is constant.
1632 Other optimizers might think otherwise. */
1633 if (gimple_cond_code (last
) != NE_EXPR
1634 || !integer_zerop (gimple_cond_rhs (last
)))
1636 set_stmt
= SSA_NAME_DEF_STMT (op
);
1637 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1638 || gimple_call_num_args (set_stmt
) != 1)
1640 op2
= gimple_call_arg (set_stmt
, 0);
1641 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1644 add_builtin_constant_p_parm (summary
, index
);
1645 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1647 predicate p
= add_condition (summary
, params_summary
, index
,
1648 param_type
, &aggpos
,
1649 predicate::is_not_constant
, NULL_TREE
);
1650 e
->aux
= edge_predicate_pool
.allocate ();
1651 *(predicate
*) e
->aux
= p
;
1656 /* If BB ends by a switch we can turn into predicates, attach corresponding
1657 predicates to the CFG edges. */
1660 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1661 class ipa_fn_summary
*summary
,
1662 class ipa_node_params
*params_summary
,
1668 struct agg_position_info aggpos
;
1674 expr_eval_ops param_ops
;
1676 lastg
= last_stmt (bb
);
1677 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1679 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1680 op
= gimple_switch_index (last
);
1681 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1685 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1686 tree type
= TREE_TYPE (op
);
1687 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1688 param_ipa_max_switch_predicate_bounds
);
1689 int bound_count
= 0;
1690 wide_int vr_wmin
, vr_wmax
;
1691 value_range_kind vr_type
= get_range_info (op
, &vr_wmin
, &vr_wmax
);
1693 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1695 e
->aux
= edge_predicate_pool
.allocate ();
1696 *(predicate
*) e
->aux
= false;
1699 e
= gimple_switch_edge (cfun
, last
, 0);
1700 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1701 default case if its target basic block is in convergence point of all
1702 switch cases, which can be determined by checking whether it
1703 post-dominates the switch statement. */
1704 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1705 bound_count
= INT_MAX
;
1707 n
= gimple_switch_num_labels (last
);
1708 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1710 tree cl
= gimple_switch_label (last
, case_idx
);
1711 tree min
= CASE_LOW (cl
);
1712 tree max
= CASE_HIGH (cl
);
1715 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1717 /* The case value might not have same type as switch expression,
1718 extend the value based on the expression type. */
1719 if (TREE_TYPE (min
) != type
)
1720 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1724 else if (TREE_TYPE (max
) != type
)
1725 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1727 /* The case's target basic block is in convergence point of all switch
1728 cases, its predicate should be at least as that of the switch
1730 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1732 else if (min
== max
)
1733 p
= add_condition (summary
, params_summary
, index
, param_type
,
1734 &aggpos
, EQ_EXPR
, min
, param_ops
);
1738 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1739 &aggpos
, GE_EXPR
, min
, param_ops
);
1740 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1741 &aggpos
, LE_EXPR
, max
, param_ops
);
1744 *(class predicate
*) e
->aux
1745 = p
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1747 /* If there are too many disjoint case ranges, predicate for default
1748 case might become too complicated. So add a limit here. */
1749 if (bound_count
> bound_limit
)
1752 bool new_range
= true;
1754 if (!ranges
.is_empty ())
1756 wide_int curr_wmin
= wi::to_wide (min
);
1757 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1759 /* Merge case ranges if they are continuous. */
1760 if (curr_wmin
== last_wmax
+ 1)
1762 else if (vr_type
== VR_ANTI_RANGE
)
1764 /* If two disjoint case ranges can be connected by anti-range
1765 of switch index, combine them to one range. */
1766 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1767 vr_type
= VR_UNDEFINED
;
1768 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1773 /* Create/extend a case range. And we count endpoints of range set,
1774 this number nearly equals to number of conditions that we will create
1775 for predicate of default case. */
1778 bound_count
+= (min
== max
) ? 1 : 2;
1779 ranges
.safe_push (std::make_pair (min
, max
));
1783 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1784 ranges
.last ().second
= max
;
1788 e
= gimple_switch_edge (cfun
, last
, 0);
1789 if (bound_count
> bound_limit
)
1791 *(class predicate
*) e
->aux
= true;
1792 vec_free (param_ops
);
1796 predicate p_seg
= true;
1797 predicate p_all
= false;
1799 if (vr_type
!= VR_RANGE
)
1801 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1802 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1805 /* Construct predicate to represent default range set that is negation of
1806 all case ranges. Case range is classified as containing single/non-single
1807 values. Suppose a piece of case ranges in the following.
1809 [D1...D2] [S1] ... [Sn] [D3...D4]
1811 To represent default case's range sets between two non-single value
1812 case ranges (From D2 to D3), we construct predicate as:
1814 D2 < x < D3 && x != S1 && ... && x != Sn
1816 for (size_t i
= 0; i
< ranges
.length (); i
++)
1818 tree min
= ranges
[i
].first
;
1819 tree max
= ranges
[i
].second
;
1822 p_seg
&= add_condition (summary
, params_summary
, index
,
1823 param_type
, &aggpos
, NE_EXPR
,
1827 /* Do not create sub-predicate for range that is beyond low bound
1829 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1831 p_seg
&= add_condition (summary
, params_summary
, index
,
1832 param_type
, &aggpos
,
1833 LT_EXPR
, min
, param_ops
);
1834 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1837 /* Do not create sub-predicate for range that is beyond up bound
1839 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1845 p_seg
= add_condition (summary
, params_summary
, index
,
1846 param_type
, &aggpos
, GT_EXPR
,
1851 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1852 *(class predicate
*) e
->aux
1853 = p_all
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1855 vec_free (param_ops
);
1859 /* For each BB in NODE attach to its AUX pointer predicate under
1860 which it is executable. */
1863 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1864 struct cgraph_node
*node
,
1865 class ipa_fn_summary
*summary
,
1866 class ipa_node_params
*params_summary
)
1868 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1872 FOR_EACH_BB_FN (bb
, my_function
)
1874 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1875 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1878 /* Entry block is always executable. */
1879 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1880 = edge_predicate_pool
.allocate ();
1881 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1883 /* A simple dataflow propagation of predicates forward in the CFG.
1884 TODO: work in reverse postorder. */
1888 FOR_EACH_BB_FN (bb
, my_function
)
1890 predicate p
= false;
1893 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1897 predicate this_bb_predicate
1898 = *(predicate
*) e
->src
->aux
;
1900 this_bb_predicate
&= (*(class predicate
*) e
->aux
);
1901 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1908 basic_block pdom_bb
;
1913 bb
->aux
= edge_predicate_pool
.allocate ();
1914 *((predicate
*) bb
->aux
) = p
;
1916 else if (p
!= *(predicate
*) bb
->aux
)
1918 /* This OR operation is needed to ensure monotonous data flow
1919 in the case we hit the limit on number of clauses and the
1920 and/or operations above give approximate answers. */
1921 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1922 if (p
!= *(predicate
*) bb
->aux
)
1925 *((predicate
*) bb
->aux
) = p
;
1929 /* For switch/if statement, we can OR-combine predicates of all
1930 its cases/branches to get predicate for basic block in their
1931 convergence point, but sometimes this will generate very
1932 complicated predicate. Actually, we can get simplified
1933 predicate in another way by using the fact that predicate
1934 for a basic block must also hold true for its post dominators.
1935 To be specific, basic block in convergence point of
1936 conditional statement should include predicate of the
1938 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1939 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1941 else if (!pdom_bb
->aux
)
1944 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1945 *((predicate
*) pdom_bb
->aux
) = p
;
1947 else if (p
!= *(predicate
*) pdom_bb
->aux
)
1949 p
= p
.or_with (summary
->conds
, *(predicate
*)pdom_bb
->aux
);
1950 if (p
!= *(predicate
*) pdom_bb
->aux
)
1953 *((predicate
*) pdom_bb
->aux
) = p
;
1962 /* Return predicate specifying when the STMT might have result that is not
1963 a compile time constant. */
1966 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1967 class ipa_fn_summary
*summary
,
1968 class ipa_node_params
*params_summary
,
1970 vec
<predicate
> nonconstant_names
)
1975 while (UNARY_CLASS_P (expr
))
1976 expr
= TREE_OPERAND (expr
, 0);
1978 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1979 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1980 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1981 predicate::changed
, NULL_TREE
);
1982 if (is_gimple_min_invariant (expr
))
1984 if (TREE_CODE (expr
) == SSA_NAME
)
1985 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1986 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1989 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1991 TREE_OPERAND (expr
, 0),
1997 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1999 TREE_OPERAND (expr
, 1),
2001 return p1
.or_with (summary
->conds
, p2
);
2003 else if (TREE_CODE (expr
) == COND_EXPR
)
2006 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2008 TREE_OPERAND (expr
, 0),
2014 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2016 TREE_OPERAND (expr
, 1),
2020 p1
= p1
.or_with (summary
->conds
, p2
);
2021 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2023 TREE_OPERAND (expr
, 2),
2025 return p2
.or_with (summary
->conds
, p1
);
2027 else if (TREE_CODE (expr
) == CALL_EXPR
)
2038 /* Return predicate specifying when the STMT might have result that is not
2039 a compile time constant. */
2042 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2043 class ipa_fn_summary
*summary
,
2044 class ipa_node_params
*params_summary
,
2046 vec
<predicate
> nonconstant_names
)
2051 tree param_type
= NULL_TREE
;
2052 predicate op_non_const
;
2055 struct agg_position_info aggpos
;
2057 /* What statements might be optimized away
2058 when their arguments are constant. */
2059 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2060 && gimple_code (stmt
) != GIMPLE_COND
2061 && gimple_code (stmt
) != GIMPLE_SWITCH
2062 && (gimple_code (stmt
) != GIMPLE_CALL
2063 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2066 /* Stores will stay anyway. */
2067 if (gimple_store_p (stmt
))
2070 is_load
= gimple_assign_load_p (stmt
);
2072 /* Loads can be optimized when the value is known. */
2075 tree op
= gimple_assign_rhs1 (stmt
);
2076 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2083 /* See if we understand all operands before we start
2084 adding conditionals. */
2085 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2087 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2088 /* For arguments we can build a condition. */
2089 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2091 if (TREE_CODE (use
) != SSA_NAME
)
2093 /* If we know when operand is constant,
2094 we still can say something useful. */
2095 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2102 add_condition (summary
, params_summary
,
2103 base_index
, param_type
, &aggpos
,
2104 predicate::changed
, NULL_TREE
);
2106 op_non_const
= false;
2107 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2109 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2112 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2114 if (index
!= base_index
)
2115 p
= add_condition (summary
, params_summary
, index
,
2116 TREE_TYPE (parm
), NULL
,
2117 predicate::changed
, NULL_TREE
);
2122 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2123 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2125 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2126 && gimple_op (stmt
, 0)
2127 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2128 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2130 return op_non_const
;
2133 struct record_modified_bb_info
2140 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2141 probability how often it changes between USE_BB.
2142 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2143 is in different loop nest, we can do better.
2144 This is all just estimate. In theory we look for minimal cut separating
2145 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2149 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2151 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2152 if (l
&& l
->header
->count
< init_bb
->count
)
2157 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2158 set except for info->stmt. */
2161 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2163 struct record_modified_bb_info
*info
=
2164 (struct record_modified_bb_info
*) data
;
2165 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2167 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2169 bitmap_set_bit (info
->bb_set
,
2170 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2171 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2173 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2174 gimple_bb (info
->stmt
))->index
);
2177 fprintf (dump_file
, " Param ");
2178 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2179 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2180 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2182 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2183 gimple_bb (info
->stmt
))->index
);
2184 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2189 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2190 will change since last invocation of STMT.
2192 Value 0 is reserved for compile time invariants.
2193 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2194 ought to be REG_BR_PROB_BASE / estimated_iters. */
2197 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2199 tree op
= gimple_call_arg (stmt
, i
);
2200 basic_block bb
= gimple_bb (stmt
);
2202 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2203 op
= TREE_OPERAND (op
, 0);
2205 tree base
= get_base_address (op
);
2207 /* Global invariants never change. */
2208 if (is_gimple_min_invariant (base
))
2211 /* We would have to do non-trivial analysis to really work out what
2212 is the probability of value to change (i.e. when init statement
2213 is in a sibling loop of the call).
2215 We do an conservative estimate: when call is executed N times more often
2216 than the statement defining value, we take the frequency 1/N. */
2217 if (TREE_CODE (base
) == SSA_NAME
)
2219 profile_count init_count
;
2221 if (!bb
->count
.nonzero_p ())
2222 return REG_BR_PROB_BASE
;
2224 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2225 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2227 init_count
= get_minimal_bb
2228 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2229 gimple_bb (stmt
))->count
;
2231 if (init_count
< bb
->count
)
2232 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2233 * REG_BR_PROB_BASE
).to_int (), 1);
2234 return REG_BR_PROB_BASE
;
2239 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2240 struct record_modified_bb_info info
;
2241 tree init
= ctor_for_folding (base
);
2243 if (init
!= error_mark_node
)
2245 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2246 return REG_BR_PROB_BASE
;
2249 fprintf (dump_file
, " Analyzing param change probability of ");
2250 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2251 fprintf (dump_file
, "\n");
2253 ao_ref_init (&refd
, op
);
2256 info
.bb_set
= BITMAP_ALLOC (NULL
);
2258 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2259 NULL
, NULL
, fbi
->aa_walk_budget
);
2261 fbi
->aa_walk_budget
-= walked
;
2262 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2265 fbi
->aa_walk_budget
= 0;
2269 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2271 fprintf (dump_file
, " Set in same BB as used.\n");
2273 BITMAP_FREE (info
.bb_set
);
2274 return REG_BR_PROB_BASE
;
2279 /* Lookup the most frequent update of the value and believe that
2280 it dominates all the other; precise analysis here is difficult. */
2281 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2282 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2285 fprintf (dump_file
, " Set with count ");
2286 max
.dump (dump_file
);
2287 fprintf (dump_file
, " and used with count ");
2288 bb
->count
.dump (dump_file
);
2289 fprintf (dump_file
, " freq %f\n",
2290 max
.to_sreal_scale (bb
->count
).to_double ());
2293 BITMAP_FREE (info
.bb_set
);
2294 if (max
< bb
->count
)
2295 return MAX ((max
.to_sreal_scale (bb
->count
)
2296 * REG_BR_PROB_BASE
).to_int (), 1);
2297 return REG_BR_PROB_BASE
;
2301 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2302 sub-graph and if the predicate the condition depends on is known. If so,
2303 return true and store the pointer the predicate in *P. */
2306 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2307 ipa_fn_summary
*summary
,
2308 class ipa_node_params
*params_summary
,
2311 vec
<predicate
> nonconstant_names
)
2315 basic_block first_bb
= NULL
;
2318 if (single_pred_p (bb
))
2324 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2326 if (single_succ_p (e
->src
))
2328 if (!single_pred_p (e
->src
))
2331 first_bb
= single_pred (e
->src
);
2332 else if (single_pred (e
->src
) != first_bb
)
2339 else if (e
->src
!= first_bb
)
2347 stmt
= last_stmt (first_bb
);
2349 || gimple_code (stmt
) != GIMPLE_COND
2350 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2353 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2354 gimple_cond_lhs (stmt
),
2362 /* Given a PHI statement in a function described by inline properties SUMMARY
2363 and *P being the predicate describing whether the selected PHI argument is
2364 known, store a predicate for the result of the PHI statement into
2365 NONCONSTANT_NAMES, if possible. */
2368 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2370 vec
<predicate
> nonconstant_names
)
2374 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2376 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2377 if (!is_gimple_min_invariant (arg
))
2379 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2380 *p
= p
->or_with (summary
->conds
,
2381 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2387 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2389 fprintf (dump_file
, "\t\tphi predicate: ");
2390 p
->dump (dump_file
, summary
->conds
);
2392 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2395 /* For a typical usage of __builtin_expect (a<b, 1), we
2396 may introduce an extra relation stmt:
2397 With the builtin, we have
2400 t3 = __builtin_expect (t2, 1);
2403 Without the builtin, we have
2406 This affects the size/time estimation and may have
2407 an impact on the earlier inlining.
2408 Here find this pattern and fix it up later. */
2411 find_foldable_builtin_expect (basic_block bb
)
2413 gimple_stmt_iterator bsi
;
2415 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2417 gimple
*stmt
= gsi_stmt (bsi
);
2418 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2419 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2420 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2422 tree var
= gimple_call_lhs (stmt
);
2423 tree arg
= gimple_call_arg (stmt
, 0);
2424 use_operand_p use_p
;
2431 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2433 while (TREE_CODE (arg
) == SSA_NAME
)
2435 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2436 if (!is_gimple_assign (stmt_tmp
))
2438 switch (gimple_assign_rhs_code (stmt_tmp
))
2457 arg
= gimple_assign_rhs1 (stmt_tmp
);
2460 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2461 && gimple_code (use_stmt
) == GIMPLE_COND
)
2468 /* Return true when the basic blocks contains only clobbers followed by RESX.
2469 Such BBs are kept around to make removal of dead stores possible with
2470 presence of EH and will be optimized out by optimize_clobbers later in the
2473 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2474 that can be clobber only, too.. When it is false, the RESX is not necessary
2475 on the end of basic block. */
2478 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2480 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2486 if (gsi_end_p (gsi
))
2488 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2492 else if (!single_succ_p (bb
))
2495 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2497 gimple
*stmt
= gsi_stmt (gsi
);
2498 if (is_gimple_debug (stmt
))
2500 if (gimple_clobber_p (stmt
))
2502 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2507 /* See if all predecessors are either throws or clobber only BBs. */
2508 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2509 if (!(e
->flags
& EDGE_EH
)
2510 && !clobber_only_eh_bb_p (e
->src
, false))
2516 /* Return true if STMT compute a floating point expression that may be affected
2517 by -ffast-math and similar flags. */
2520 fp_expression_p (gimple
*stmt
)
2525 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2526 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2531 /* Return true if T references memory location that is local
2532 for the function (that means, dead after return) or read-only. */
2535 refs_local_or_readonly_memory_p (tree t
)
2537 /* Non-escaping memory is fine. */
2538 t
= get_base_address (t
);
2539 if ((TREE_CODE (t
) == MEM_REF
2540 || TREE_CODE (t
) == TARGET_MEM_REF
))
2541 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2543 /* Automatic variables are fine. */
2545 && auto_var_in_fn_p (t
, current_function_decl
))
2548 /* Read-only variables are fine. */
2549 if (DECL_P (t
) && TREE_READONLY (t
))
2555 /* Return true if T is a pointer pointing to memory location that is local
2556 for the function (that means, dead after return) or read-only. */
2559 points_to_local_or_readonly_memory_p (tree t
)
2561 /* See if memory location is clearly invalid. */
2562 if (integer_zerop (t
))
2563 return flag_delete_null_pointer_checks
;
2564 if (TREE_CODE (t
) == SSA_NAME
)
2565 return !ptr_deref_may_alias_global_p (t
);
2566 if (TREE_CODE (t
) == ADDR_EXPR
)
2567 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2572 /* Analyze function body for NODE.
2573 EARLY indicates run from early optimization pipeline. */
2576 analyze_function_body (struct cgraph_node
*node
, bool early
)
2578 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2579 /* Estimate static overhead for function prologue/epilogue and alignment. */
2580 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2581 /* Benefits are scaled by probability of elimination that is in range
2584 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2586 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2587 class ipa_node_params
*params_summary
= early
? NULL
: IPA_NODE_REF (node
);
2588 predicate bb_predicate
;
2589 struct ipa_func_body_info fbi
;
2590 vec
<predicate
> nonconstant_names
= vNULL
;
2593 gimple
*fix_builtin_expect_stmt
;
2595 gcc_assert (my_function
&& my_function
->cfg
);
2596 gcc_assert (cfun
== my_function
);
2598 memset(&fbi
, 0, sizeof(fbi
));
2599 vec_free (info
->conds
);
2601 info
->size_time_table
.release ();
2602 info
->call_size_time_table
.release ();
2604 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2605 so we can produce proper inline hints.
2607 When optimizing and analyzing for early inliner, initialize node params
2608 so we can produce correct BB predicates. */
2610 if (opt_for_fn (node
->decl
, optimize
))
2612 calculate_dominance_info (CDI_DOMINATORS
);
2613 calculate_dominance_info (CDI_POST_DOMINATORS
);
2615 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2618 ipa_check_create_node_params ();
2619 ipa_initialize_node_params (node
);
2622 if (ipa_node_params_sum
)
2625 fbi
.info
= IPA_NODE_REF (node
);
2626 fbi
.bb_infos
= vNULL
;
2627 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2628 fbi
.param_count
= count_formal_params (node
->decl
);
2629 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2631 nonconstant_names
.safe_grow_cleared
2632 (SSANAMES (my_function
)->length (), true);
2637 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2638 node
->dump_name ());
2640 /* When we run into maximal number of entries, we assign everything to the
2641 constant truth case. Be sure to have it in list. */
2642 bb_predicate
= true;
2643 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2645 bb_predicate
= predicate::not_inlined ();
2646 info
->account_size_time (opt_for_fn (node
->decl
,
2647 param_uninlined_function_insns
)
2648 * ipa_fn_summary::size_scale
,
2649 opt_for_fn (node
->decl
,
2650 param_uninlined_function_time
),
2655 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2656 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2657 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2658 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2659 for (n
= 0; n
< nblocks
; n
++)
2661 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2662 freq
= bb
->count
.to_sreal_scale (entry_count
);
2663 if (clobber_only_eh_bb_p (bb
))
2665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2666 fprintf (dump_file
, "\n Ignoring BB %i;"
2667 " it will be optimized away by cleanup_clobbers\n",
2672 /* TODO: Obviously predicates can be propagated down across CFG. */
2676 bb_predicate
= *(predicate
*) bb
->aux
;
2678 bb_predicate
= false;
2681 bb_predicate
= true;
2683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2685 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2686 bb_predicate
.dump (dump_file
, info
->conds
);
2689 if (fbi
.info
&& nonconstant_names
.exists ())
2691 predicate phi_predicate
;
2692 bool first_phi
= true;
2694 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2698 && !phi_result_unknown_predicate (&fbi
, info
,
2705 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2707 fprintf (dump_file
, " ");
2708 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2710 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2715 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2717 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2718 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2720 gimple
*stmt
= gsi_stmt (bsi
);
2721 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2722 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2724 predicate will_be_nonconstant
;
2726 /* This relation stmt should be folded after we remove
2727 __builtin_expect call. Adjust the cost here. */
2728 if (stmt
== fix_builtin_expect_stmt
)
2734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2736 fprintf (dump_file
, " ");
2737 print_gimple_stmt (dump_file
, stmt
, 0);
2738 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2739 freq
.to_double (), this_size
,
2743 if (is_gimple_call (stmt
)
2744 && !gimple_call_internal_p (stmt
))
2746 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2747 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2749 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2750 resolved as constant. We however don't want to optimize
2751 out the cgraph edges. */
2752 if (nonconstant_names
.exists ()
2753 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2754 && gimple_call_lhs (stmt
)
2755 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2757 predicate false_p
= false;
2758 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2761 if (ipa_node_params_sum
)
2763 int count
= gimple_call_num_args (stmt
);
2767 es
->param
.safe_grow_cleared (count
, true);
2768 for (i
= 0; i
< count
; i
++)
2770 int prob
= param_change_prob (&fbi
, stmt
, i
);
2771 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2772 es
->param
[i
].change_prob
= prob
;
2773 es
->param
[i
].points_to_local_or_readonly_memory
2774 = points_to_local_or_readonly_memory_p
2775 (gimple_call_arg (stmt
, i
));
2778 /* We cannot setup VLA parameters during inlining. */
2779 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2780 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2782 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2785 es
->call_stmt_size
= this_size
;
2786 es
->call_stmt_time
= this_time
;
2787 es
->loop_depth
= bb_loop_depth (bb
);
2788 edge_set_predicate (edge
, &bb_predicate
);
2789 if (edge
->speculative
)
2791 cgraph_edge
*indirect
2792 = edge
->speculative_call_indirect_edge ();
2793 ipa_call_summary
*es2
2794 = ipa_call_summaries
->get_create (indirect
);
2795 ipa_call_summaries
->duplicate (edge
, indirect
,
2798 /* Edge is the first direct call.
2799 create and duplicate call summaries for multiple
2800 speculative call targets. */
2801 for (cgraph_edge
*direct
2802 = edge
->next_speculative_call_target ();
2804 direct
= direct
->next_speculative_call_target ())
2806 ipa_call_summary
*es3
2807 = ipa_call_summaries
->get_create (direct
);
2808 ipa_call_summaries
->duplicate (edge
, direct
,
2814 /* TODO: When conditional jump or switch is known to be constant, but
2815 we did not translate it into the predicates, we really can account
2816 just maximum of the possible paths. */
2819 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2820 stmt
, nonconstant_names
);
2822 will_be_nonconstant
= true;
2823 if (this_time
|| this_size
)
2825 sreal final_time
= (sreal
)this_time
* freq
;
2827 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2828 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2830 "\t\t50%% will be eliminated by inlining\n");
2831 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2832 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2834 class predicate p
= bb_predicate
& will_be_nonconstant
;
2836 /* We can ignore statement when we proved it is never going
2837 to happen, but we cannot do that for call statements
2838 because edges are accounted specially. */
2840 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2846 /* We account everything but the calls. Calls have their own
2847 size/time info attached to cgraph edges. This is necessary
2848 in order to make the cost disappear after inlining. */
2849 if (!is_gimple_call (stmt
))
2853 predicate ip
= bb_predicate
& predicate::not_inlined ();
2854 info
->account_size_time (this_size
* prob
,
2855 (final_time
* prob
) / 2, ip
,
2859 info
->account_size_time (this_size
* (2 - prob
),
2860 (final_time
* (2 - prob
) / 2),
2865 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2867 info
->fp_expressions
= true;
2869 fprintf (dump_file
, " fp_expression set\n");
2873 /* Account cost of address calculations in the statements. */
2874 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2876 for (tree op
= gimple_op (stmt
, i
);
2877 op
&& handled_component_p (op
);
2878 op
= TREE_OPERAND (op
, 0))
2879 if ((TREE_CODE (op
) == ARRAY_REF
2880 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2881 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2883 predicate p
= bb_predicate
;
2885 p
= p
& will_be_nonconstant_expr_predicate
2886 (&fbi
, info
, params_summary
,
2887 TREE_OPERAND (op
, 1),
2895 "\t\tAccounting address calculation.\n");
2896 info
->account_size_time (ipa_fn_summary::size_scale
,
2908 if (nonconstant_names
.exists () && !early
)
2910 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2912 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
2913 param_ipa_max_loop_predicates
);
2915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2916 flow_loops_dump (dump_file
, NULL
, 0);
2918 FOR_EACH_LOOP (loop
, 0)
2920 predicate loop_iterations
= true;
2924 class tree_niter_desc niter_desc
;
2925 if (!loop
->header
->aux
)
2928 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2929 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2931 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2932 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
2933 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2934 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2935 && !is_gimple_min_invariant (niter_desc
.niter
))
2937 predicate will_be_nonconstant
2938 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2942 if (will_be_nonconstant
!= true)
2943 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2944 if (will_be_nonconstant
!= true
2945 && will_be_nonconstant
!= false)
2946 loop_iterations
&= will_be_nonconstant
;
2948 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
2949 phdr_freq
, max_loop_predicates
);
2952 /* To avoid quadratic behavior we analyze stride predicates only
2953 with respect to the containing loop. Thus we simply iterate
2954 over all defs in the outermost loop body. */
2955 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2956 loop
!= NULL
; loop
= loop
->next
)
2958 predicate loop_stride
= true;
2959 basic_block
*body
= get_loop_body (loop
);
2960 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2961 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2962 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2964 gimple_stmt_iterator gsi
;
2968 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2969 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2972 gimple
*stmt
= gsi_stmt (gsi
);
2974 if (!is_gimple_assign (stmt
))
2977 tree def
= gimple_assign_lhs (stmt
);
2978 if (TREE_CODE (def
) != SSA_NAME
)
2982 if (!simple_iv (loop_containing_stmt (stmt
),
2983 loop_containing_stmt (stmt
),
2985 || is_gimple_min_invariant (iv
.step
))
2988 predicate will_be_nonconstant
2989 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2993 if (will_be_nonconstant
!= true)
2994 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2995 if (will_be_nonconstant
!= true
2996 && will_be_nonconstant
!= false)
2997 loop_stride
= loop_stride
& will_be_nonconstant
;
3000 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3001 phdr_freq
, max_loop_predicates
);
3006 FOR_ALL_BB_FN (bb
, my_function
)
3012 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
3014 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3017 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
3021 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3022 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3024 ss
->self_size
= size
;
3025 nonconstant_names
.release ();
3026 ipa_release_body_info (&fbi
);
3027 if (opt_for_fn (node
->decl
, optimize
))
3030 loop_optimizer_finalize ();
3031 else if (!ipa_edge_args_sum
)
3032 ipa_free_all_node_params ();
3033 free_dominance_info (CDI_DOMINATORS
);
3034 free_dominance_info (CDI_POST_DOMINATORS
);
3038 fprintf (dump_file
, "\n");
3039 ipa_dump_fn_summary (dump_file
, node
);
3044 /* Compute function summary.
3045 EARLY is true when we compute parameters during early opts. */
3048 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3050 HOST_WIDE_INT self_stack_size
;
3051 struct cgraph_edge
*e
;
3053 gcc_assert (!node
->inlined_to
);
3055 if (!ipa_fn_summaries
)
3056 ipa_fn_summary_alloc ();
3058 /* Create a new ipa_fn_summary. */
3059 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3060 ipa_fn_summaries
->remove (node
);
3061 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3062 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3064 /* Estimate the stack size for the function if we're optimizing. */
3065 self_stack_size
= optimize
&& !node
->thunk
3066 ? estimated_stack_frame_size (node
) : 0;
3067 size_info
->estimated_self_stack_size
= self_stack_size
;
3068 info
->estimated_stack_size
= self_stack_size
;
3072 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3075 node
->can_change_signature
= false;
3076 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3077 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3078 info
->account_size_time (ipa_fn_summary::size_scale
3079 * opt_for_fn (node
->decl
,
3080 param_uninlined_function_thunk_insns
),
3081 opt_for_fn (node
->decl
,
3082 param_uninlined_function_thunk_time
), t
, t
);
3083 t
= predicate::not_inlined ();
3084 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3085 ipa_update_overall_fn_summary (node
);
3086 size_info
->self_size
= size_info
->size
;
3087 if (stdarg_p (TREE_TYPE (node
->decl
)))
3089 info
->inlinable
= false;
3090 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3093 info
->inlinable
= true;
3097 /* Even is_gimple_min_invariant rely on current_function_decl. */
3098 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3100 /* During IPA profile merging we may be called w/o virtual SSA form
3102 update_ssa (TODO_update_ssa_only_virtuals
);
3104 /* Can this function be inlined at all? */
3105 if (!opt_for_fn (node
->decl
, optimize
)
3106 && !lookup_attribute ("always_inline",
3107 DECL_ATTRIBUTES (node
->decl
)))
3108 info
->inlinable
= false;
3110 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3112 /* Type attributes can use parameter indices to describe them. */
3113 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
3114 /* Likewise for #pragma omp declare simd functions or functions
3115 with simd attribute. */
3116 || lookup_attribute ("omp declare simd",
3117 DECL_ATTRIBUTES (node
->decl
)))
3118 node
->can_change_signature
= false;
3121 /* Otherwise, inlinable functions always can change signature. */
3122 if (info
->inlinable
)
3123 node
->can_change_signature
= true;
3126 /* Functions calling builtin_apply cannot change signature. */
3127 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3129 tree
cdecl = e
->callee
->decl
;
3130 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
3131 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
3134 node
->can_change_signature
= !e
;
3137 analyze_function_body (node
, early
);
3141 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3142 size_info
->size
= size_info
->self_size
;
3143 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3145 /* Code above should compute exactly the same result as
3146 ipa_update_overall_fn_summary except for case when speculative
3147 edges are present since these are accounted to size but not
3148 self_size. Do not compare time since different order the roundoff
3149 errors result in slight changes. */
3150 ipa_update_overall_fn_summary (node
);
3153 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3156 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3161 /* Compute parameters of functions used by inliner using
3162 current_function_decl. */
3165 compute_fn_summary_for_current (void)
3167 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3171 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3172 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3173 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3176 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3177 int *size
, int *time
,
3178 ipa_call_arg_values
*avals
)
3181 struct cgraph_node
*callee
;
3182 class ipa_fn_summary
*isummary
;
3183 enum availability avail
;
3187 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3189 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3192 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3193 if (!target
|| speculative
)
3196 /* Account for difference in cost between indirect and direct calls. */
3197 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3198 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3199 gcc_checking_assert (*time
>= 0);
3200 gcc_checking_assert (*size
>= 0);
3202 callee
= cgraph_node::get (target
);
3203 if (!callee
|| !callee
->definition
)
3205 callee
= callee
->function_symbol (&avail
);
3206 if (avail
< AVAIL_AVAILABLE
)
3208 isummary
= ipa_fn_summaries
->get (callee
);
3209 if (isummary
== NULL
)
3212 return isummary
->inlinable
;
3215 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3216 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3217 devirtualized. AVALS, if non-NULL, describes the context of the call site
3218 as far as values of parameters are concerened. */
3221 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3222 sreal
*time
, ipa_call_arg_values
*avals
,
3225 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3226 int call_size
= es
->call_stmt_size
;
3227 int call_time
= es
->call_stmt_time
;
3230 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3231 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3232 *hints
|= INLINE_HINT_indirect_call
;
3233 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3236 *min_size
+= cur_size
;
3238 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3242 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3243 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3246 Helper for estimate_calls_size_and_time which does the same but
3247 (in most cases) faster. */
3250 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3251 int *min_size
, sreal
*time
,
3253 clause_t possible_truths
,
3254 ipa_call_arg_values
*avals
)
3256 struct cgraph_edge
*e
;
3257 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3259 if (!e
->inline_failed
)
3261 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3262 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3263 hints
, possible_truths
, avals
);
3267 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3269 /* Do not care about zero sized builtins. */
3270 if (!es
->call_stmt_size
)
3272 gcc_checking_assert (!es
->call_stmt_time
);
3276 || es
->predicate
->evaluate (possible_truths
))
3278 /* Predicates of calls shall not use NOT_CHANGED codes,
3279 so we do not need to compute probabilities. */
3280 estimate_edge_size_and_time (e
, size
,
3281 es
->predicate
? NULL
: min_size
,
3282 time
, avals
, hints
);
3285 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3289 || es
->predicate
->evaluate (possible_truths
))
3290 estimate_edge_size_and_time (e
, size
,
3291 es
->predicate
? NULL
: min_size
,
3292 time
, avals
, hints
);
3296 /* Populate sum->call_size_time_table for edges from NODE. */
3299 summarize_calls_size_and_time (struct cgraph_node
*node
,
3300 ipa_fn_summary
*sum
)
3302 struct cgraph_edge
*e
;
3303 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3305 if (!e
->inline_failed
)
3307 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3308 summarize_calls_size_and_time (e
->callee
, sum
);
3314 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3316 struct predicate pred
= true;
3317 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3320 pred
= *es
->predicate
;
3321 sum
->account_size_time (size
, time
, pred
, pred
, true);
3323 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3328 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3329 struct predicate pred
= true;
3330 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3333 pred
= *es
->predicate
;
3334 sum
->account_size_time (size
, time
, pred
, pred
, true);
3338 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3339 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3340 context of the call site. */
3343 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3344 int *min_size
, sreal
*time
,
3346 clause_t possible_truths
,
3347 ipa_call_arg_values
*avals
)
3349 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3350 bool use_table
= true;
3352 gcc_assert (node
->callees
|| node
->indirect_calls
);
3354 /* During early inlining we do not calculate info for very
3355 large functions and thus there is no need for producing
3357 if (!ipa_node_params_sum
)
3359 /* Do not calculate summaries for simple wrappers; it is waste
3361 else if (node
->callees
&& node
->indirect_calls
3362 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3364 /* If there is an indirect edge that may be optimized, we need
3365 to go the slow way. */
3366 else if (avals
&& hints
3367 && (avals
->m_known_vals
.length ()
3368 || avals
->m_known_contexts
.length ()
3369 || avals
->m_known_aggs
.length ()))
3371 class ipa_node_params
*params_summary
= IPA_NODE_REF (node
);
3372 unsigned int nargs
= params_summary
3373 ? ipa_get_param_count (params_summary
) : 0;
3375 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3377 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3378 && (avals
->safe_sval_at (i
)
3379 || (avals
->m_known_aggs
.length () > i
3380 && avals
->m_known_aggs
[i
].items
.length ())))
3382 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3383 && (avals
->m_known_contexts
.length () > i
3384 && !avals
->m_known_contexts
[i
].useless_p ()))
3389 /* Fast path is via the call size time table. */
3392 /* Build summary if it is absent. */
3393 if (!sum
->call_size_time_table
.length ())
3395 predicate true_pred
= true;
3396 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3397 summarize_calls_size_and_time (node
, sum
);
3400 int old_size
= *size
;
3401 sreal old_time
= time
? *time
: 0;
3404 *min_size
+= sum
->call_size_time_table
[0].size
;
3409 /* Walk the table and account sizes and times. */
3410 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3412 if (e
->exec_predicate
.evaluate (possible_truths
))
3419 /* Be careful and see if both methods agree. */
3420 if ((flag_checking
|| dump_file
)
3421 /* Do not try to sanity check when we know we lost some
3423 && sum
->call_size_time_table
.length ()
3424 < ipa_fn_summary::max_size_time_table_size
)
3426 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3427 possible_truths
, avals
);
3428 gcc_assert (*size
== old_size
);
3429 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3431 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3432 old_time
.to_double (),
3433 time
->to_double ());
3436 /* Slow path by walking all edges. */
3438 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3439 possible_truths
, avals
);
3442 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3443 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3446 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3447 clause_t nonspec_possible_truths
,
3448 vec
<inline_param_summary
>
3449 inline_param_summary
,
3450 ipa_auto_call_arg_values
*arg_values
)
3451 : m_node (node
), m_possible_truths (possible_truths
),
3452 m_nonspec_possible_truths (nonspec_possible_truths
),
3453 m_inline_param_summary (inline_param_summary
),
3454 m_avals (arg_values
)
3458 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3461 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3463 m_node
= ctx
.m_node
;
3464 m_possible_truths
= ctx
.m_possible_truths
;
3465 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3466 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3467 unsigned int nargs
= params_summary
3468 ? ipa_get_param_count (params_summary
) : 0;
3470 m_inline_param_summary
= vNULL
;
3471 /* Copy the info only if there is at least one useful entry. */
3472 if (ctx
.m_inline_param_summary
.exists ())
3474 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3476 for (unsigned int i
= 0; i
< n
; i
++)
3477 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3478 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3480 m_inline_param_summary
3481 = ctx
.m_inline_param_summary
.copy ();
3485 m_avals
.m_known_vals
= vNULL
;
3486 if (ctx
.m_avals
.m_known_vals
.exists ())
3488 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3490 for (unsigned int i
= 0; i
< n
; i
++)
3491 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3492 && ctx
.m_avals
.m_known_vals
[i
])
3494 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3499 m_avals
.m_known_contexts
= vNULL
;
3500 if (ctx
.m_avals
.m_known_contexts
.exists ())
3502 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3504 for (unsigned int i
= 0; i
< n
; i
++)
3505 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3506 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3508 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3513 m_avals
.m_known_aggs
= vNULL
;
3514 if (ctx
.m_avals
.m_known_aggs
.exists ())
3516 unsigned int n
= MIN (ctx
.m_avals
.m_known_aggs
.length (), nargs
);
3518 for (unsigned int i
= 0; i
< n
; i
++)
3519 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3520 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3522 m_avals
.m_known_aggs
3523 = ipa_copy_agg_values (ctx
.m_avals
.m_known_aggs
);
3528 m_avals
.m_known_value_ranges
= vNULL
;
3531 /* Release memory used by known_vals/contexts/aggs vectors. and
3532 inline_param_summary. */
3535 ipa_cached_call_context::release ()
3537 /* See if context is initialized at first place. */
3540 ipa_release_agg_values (m_avals
.m_known_aggs
, true);
3541 m_avals
.m_known_vals
.release ();
3542 m_avals
.m_known_contexts
.release ();
3543 m_inline_param_summary
.release ();
3546 /* Return true if CTX describes the same call context as THIS. */
3549 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3551 if (m_node
!= ctx
.m_node
3552 || m_possible_truths
!= ctx
.m_possible_truths
3553 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3556 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3557 unsigned int nargs
= params_summary
3558 ? ipa_get_param_count (params_summary
) : 0;
3560 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3562 for (unsigned int i
= 0; i
< nargs
; i
++)
3564 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3566 if (i
>= m_inline_param_summary
.length ()
3567 || m_inline_param_summary
[i
].useless_p ())
3569 if (i
< ctx
.m_inline_param_summary
.length ()
3570 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3574 if (i
>= ctx
.m_inline_param_summary
.length ()
3575 || ctx
.m_inline_param_summary
[i
].useless_p ())
3577 if (i
< m_inline_param_summary
.length ()
3578 && !m_inline_param_summary
[i
].useless_p ())
3582 if (!m_inline_param_summary
[i
].equal_to
3583 (ctx
.m_inline_param_summary
[i
]))
3587 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3589 for (unsigned int i
= 0; i
< nargs
; i
++)
3591 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3593 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3595 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3596 && ctx
.m_avals
.m_known_vals
[i
])
3600 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3601 || !ctx
.m_avals
.m_known_vals
[i
])
3603 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3607 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3611 if (m_avals
.m_known_contexts
.exists ()
3612 || ctx
.m_avals
.m_known_contexts
.exists ())
3614 for (unsigned int i
= 0; i
< nargs
; i
++)
3616 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3618 if (i
>= m_avals
.m_known_contexts
.length ()
3619 || m_avals
.m_known_contexts
[i
].useless_p ())
3621 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3622 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3626 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3627 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3629 if (i
< m_avals
.m_known_contexts
.length ()
3630 && !m_avals
.m_known_contexts
[i
].useless_p ())
3634 if (!m_avals
.m_known_contexts
[i
].equal_to
3635 (ctx
.m_avals
.m_known_contexts
[i
]))
3639 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3641 for (unsigned int i
= 0; i
< nargs
; i
++)
3643 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3645 if (i
>= m_avals
.m_known_aggs
.length ()
3646 || m_avals
.m_known_aggs
[i
].is_empty ())
3648 if (i
< ctx
.m_avals
.m_known_aggs
.length ()
3649 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3653 if (i
>= ctx
.m_avals
.m_known_aggs
.length ()
3654 || ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3656 if (i
< m_avals
.m_known_aggs
.length ()
3657 && !m_avals
.m_known_aggs
[i
].is_empty ())
3661 if (!m_avals
.m_known_aggs
[i
].equal_to (ctx
.m_avals
.m_known_aggs
[i
]))
3668 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3669 this context. Always compute size and min_size. Only compute time and
3670 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3674 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3675 bool est_times
, bool est_hints
)
3677 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3682 ipa_hints hints
= 0;
3683 sreal loops_with_known_iterations
= 0;
3684 sreal loops_with_known_strides
= 0;
3687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3690 fprintf (dump_file
, " Estimating body: %s\n"
3691 " Known to be false: ", m_node
->dump_name ());
3693 for (i
= predicate::not_inlined_condition
;
3694 i
< (predicate::first_dynamic_condition
3695 + (int) vec_safe_length (info
->conds
)); i
++)
3696 if (!(m_possible_truths
& (1 << i
)))
3699 fprintf (dump_file
, ", ");
3701 dump_condition (dump_file
, info
->conds
, i
);
3705 if (m_node
->callees
|| m_node
->indirect_calls
)
3706 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3707 est_times
? &time
: NULL
,
3708 est_hints
? &hints
: NULL
, m_possible_truths
,
3711 sreal nonspecialized_time
= time
;
3713 min_size
+= info
->size_time_table
[0].size
;
3714 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3716 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3718 /* Because predicates are conservative, it can happen that nonconst is 1
3722 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3724 gcc_checking_assert (e
->time
>= 0);
3725 gcc_checking_assert (time
>= 0);
3727 /* We compute specialized size only because size of nonspecialized
3728 copy is context independent.
3730 The difference between nonspecialized execution and specialized is
3731 that nonspecialized is not going to have optimized out computations
3732 known to be constant in a specialized setting. */
3737 nonspecialized_time
+= e
->time
;
3740 else if (!m_inline_param_summary
.exists ())
3747 int prob
= e
->nonconst_predicate
.probability
3748 (info
->conds
, m_possible_truths
,
3749 m_inline_param_summary
);
3750 gcc_checking_assert (prob
>= 0);
3751 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3752 if (prob
== REG_BR_PROB_BASE
)
3755 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3757 gcc_checking_assert (time
>= 0);
3760 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3761 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3762 gcc_checking_assert (min_size
>= 0);
3763 gcc_checking_assert (size
>= 0);
3764 gcc_checking_assert (time
>= 0);
3765 /* nonspecialized_time should be always bigger than specialized time.
3766 Roundoff issues however may get into the way. */
3767 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3769 /* Roundoff issues may make specialized time bigger than nonspecialized
3770 time. We do not really want that to happen because some heuristics
3771 may get confused by seeing negative speedups. */
3772 if (time
> nonspecialized_time
)
3773 time
= nonspecialized_time
;
3778 hints
|= INLINE_HINT_in_scc
;
3779 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3780 hints
|= INLINE_HINT_declared_inline
;
3781 if (info
->builtin_constant_p_parms
.length ()
3782 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3783 hints
|= INLINE_HINT_builtin_constant_p
;
3785 ipa_freqcounting_predicate
*fcp
;
3786 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3787 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3789 hints
|= INLINE_HINT_loop_iterations
;
3790 loops_with_known_iterations
+= fcp
->freq
;
3792 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3794 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3795 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3797 hints
|= INLINE_HINT_loop_stride
;
3798 loops_with_known_strides
+= fcp
->freq
;
3800 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3803 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3804 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3808 fprintf (dump_file
, "\n size:%i", (int) size
);
3810 fprintf (dump_file
, " time:%f nonspec time:%f",
3811 time
.to_double (), nonspecialized_time
.to_double ());
3813 fprintf (dump_file
, " loops with known iterations:%f "
3814 "known strides:%f", loops_with_known_iterations
.to_double (),
3815 loops_with_known_strides
.to_double ());
3816 fprintf (dump_file
, "\n");
3820 estimates
->time
= time
;
3821 estimates
->nonspecialized_time
= nonspecialized_time
;
3823 estimates
->size
= size
;
3824 estimates
->min_size
= min_size
;
3826 estimates
->hints
= hints
;
3831 /* Estimate size and time needed to execute callee of EDGE assuming that
3832 parameters known to be constant at caller of EDGE are propagated.
3833 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3834 and types for parameters. */
3837 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3838 ipa_auto_call_arg_values
*avals
,
3839 ipa_call_estimates
*estimates
)
3841 clause_t clause
, nonspec_clause
;
3843 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
3845 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
3846 ctx
.estimate_size_and_time (estimates
);
3849 /* Return stack frame offset where frame of NODE is supposed to start inside
3850 of the function it is inlined to.
3851 Return 0 for functions that are not inlined. */
3854 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3856 HOST_WIDE_INT offset
= 0;
3857 if (!node
->inlined_to
)
3859 node
= node
->callers
->caller
;
3862 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3863 if (!node
->inlined_to
)
3865 node
= node
->callers
->caller
;
3870 /* Update summary information of inline clones after inlining.
3871 Compute peak stack usage. */
3874 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3876 struct cgraph_edge
*e
;
3878 ipa_propagate_frequency (node
);
3879 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3881 if (!e
->inline_failed
)
3882 inline_update_callee_summaries (e
->callee
, depth
);
3884 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3886 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3887 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3890 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3891 INLINED_EDGE has been inlined.
3893 When function A is inlined in B and A calls C with parameter that
3894 changes with probability PROB1 and C is known to be passthrough
3895 of argument if B that change with probability PROB2, the probability
3896 of change is now PROB1*PROB2. */
3899 remap_edge_params (struct cgraph_edge
*inlined_edge
,
3900 struct cgraph_edge
*edge
)
3902 if (ipa_node_params_sum
)
3905 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3908 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3909 class ipa_call_summary
*inlined_es
3910 = ipa_call_summaries
->get (inlined_edge
);
3912 if (es
->param
.length () == 0)
3915 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3917 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3918 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3919 || jfunc
->type
== IPA_JF_ANCESTOR
)
3921 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3922 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3923 : ipa_get_jf_ancestor_formal_id (jfunc
);
3924 if (id
< (int) inlined_es
->param
.length ())
3926 int prob1
= es
->param
[i
].change_prob
;
3927 int prob2
= inlined_es
->param
[id
].change_prob
;
3928 int prob
= combine_probabilities (prob1
, prob2
);
3930 if (prob1
&& prob2
&& !prob
)
3933 es
->param
[i
].change_prob
= prob
;
3936 ->param
[id
].points_to_local_or_readonly_memory
)
3937 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3939 if (!es
->param
[i
].points_to_local_or_readonly_memory
3940 && jfunc
->type
== IPA_JF_CONST
3941 && points_to_local_or_readonly_memory_p
3942 (ipa_get_jf_constant (jfunc
)))
3943 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3949 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3951 Remap predicates of callees of NODE. Rest of arguments match
3954 Also update change probabilities. */
3957 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
3958 struct cgraph_node
*node
,
3959 class ipa_fn_summary
*info
,
3960 class ipa_node_params
*params_summary
,
3961 class ipa_fn_summary
*callee_info
,
3962 vec
<int> operand_map
,
3963 vec
<HOST_WIDE_INT
> offset_map
,
3964 clause_t possible_truths
,
3965 predicate
*toplev_predicate
)
3967 struct cgraph_edge
*e
, *next
;
3968 for (e
= node
->callees
; e
; e
= next
)
3971 next
= e
->next_callee
;
3973 if (e
->inline_failed
)
3975 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3976 remap_edge_params (inlined_edge
, e
);
3980 p
= es
->predicate
->remap_after_inlining
3981 (info
, params_summary
,
3982 callee_info
, operand_map
,
3983 offset_map
, possible_truths
,
3985 edge_set_predicate (e
, &p
);
3988 edge_set_predicate (e
, toplev_predicate
);
3991 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
3992 params_summary
, callee_info
,
3993 operand_map
, offset_map
, possible_truths
,
3996 for (e
= node
->indirect_calls
; e
; e
= next
)
3998 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4000 next
= e
->next_callee
;
4002 remap_edge_params (inlined_edge
, e
);
4005 p
= es
->predicate
->remap_after_inlining
4006 (info
, params_summary
,
4007 callee_info
, operand_map
, offset_map
,
4008 possible_truths
, *toplev_predicate
);
4009 edge_set_predicate (e
, &p
);
4012 edge_set_predicate (e
, toplev_predicate
);
4016 /* Run remap_after_inlining on each predicate in V. */
4019 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4020 class ipa_node_params
*params_summary
,
4021 class ipa_fn_summary
*callee_info
,
4022 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4023 vec
<int> operand_map
,
4024 vec
<HOST_WIDE_INT
> offset_map
,
4025 clause_t possible_truths
,
4026 predicate
*toplev_predicate
)
4029 ipa_freqcounting_predicate
*fcp
;
4030 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4033 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4034 callee_info
, operand_map
,
4035 offset_map
, possible_truths
,
4037 if (p
!= false && p
!= true)
4038 *fcp
->predicate
&= p
;
4042 /* We inlined EDGE. Update summary of the function we inlined into. */
4045 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4047 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4048 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4049 ? edge
->caller
->inlined_to
: edge
->caller
);
4050 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4051 clause_t clause
= 0; /* not_inline is known to be false. */
4053 auto_vec
<int, 8> operand_map
;
4054 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4056 predicate toplev_predicate
;
4057 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4058 class ipa_node_params
*params_summary
= (ipa_node_params_sum
4059 ? IPA_NODE_REF (to
) : NULL
);
4062 toplev_predicate
= *es
->predicate
;
4064 toplev_predicate
= true;
4066 info
->fp_expressions
|= callee_info
->fp_expressions
;
4068 if (callee_info
->conds
)
4070 ipa_auto_call_arg_values avals
;
4071 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4073 if (ipa_node_params_sum
&& callee_info
->conds
)
4075 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
4076 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4081 operand_map
.safe_grow_cleared (count
, true);
4082 offset_map
.safe_grow_cleared (count
, true);
4084 for (i
= 0; i
< count
; i
++)
4086 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4089 /* TODO: handle non-NOPs when merging. */
4090 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4092 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4093 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4094 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4097 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4099 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4100 if (offset
>= 0 && offset
< INT_MAX
)
4102 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4103 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4105 offset_map
[i
] = offset
;
4108 operand_map
[i
] = map
;
4109 gcc_assert (map
< ipa_get_param_count (params_summary
));
4113 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4114 if (ip
< count
&& operand_map
[ip
] >= 0)
4115 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4117 sreal freq
= edge
->sreal_frequency ();
4118 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4121 p
= e
->exec_predicate
.remap_after_inlining
4122 (info
, params_summary
,
4123 callee_info
, operand_map
,
4126 predicate nonconstp
;
4127 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4128 (info
, params_summary
,
4129 callee_info
, operand_map
,
4132 if (p
!= false && nonconstp
!= false)
4134 sreal add_time
= ((sreal
)e
->time
* freq
);
4135 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4137 if (prob
!= REG_BR_PROB_BASE
)
4138 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4139 if (prob
!= REG_BR_PROB_BASE
4140 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4142 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4143 (double) prob
/ REG_BR_PROB_BASE
);
4145 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4148 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4149 callee_info
, operand_map
,
4150 offset_map
, clause
, &toplev_predicate
);
4151 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4152 info
->loop_iterations
, operand_map
,
4153 offset_map
, clause
, &toplev_predicate
);
4154 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4155 info
->loop_strides
, operand_map
,
4156 offset_map
, clause
, &toplev_predicate
);
4158 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4159 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4161 if (info
->estimated_stack_size
< peak
)
4162 info
->estimated_stack_size
= peak
;
4164 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4165 if (info
->call_size_time_table
.length ())
4168 sreal edge_time
= 0;
4170 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4171 /* Unaccount size and time of the optimized out call. */
4172 info
->account_size_time (-edge_size
, -edge_time
,
4173 es
->predicate
? *es
->predicate
: true,
4174 es
->predicate
? *es
->predicate
: true,
4176 /* Account new calls. */
4177 summarize_calls_size_and_time (edge
->callee
, info
);
4180 /* Free summaries that are not maintained for inline clones/edges. */
4181 ipa_call_summaries
->remove (edge
);
4182 ipa_fn_summaries
->remove (edge
->callee
);
4183 ipa_remove_from_growth_caches (edge
);
4186 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4187 overall size and time. Recompute it.
4188 If RESET is true also recompute call_time_size_table. */
4191 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4193 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4194 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4198 size_info
->size
= 0;
4200 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4202 size_info
->size
+= e
->size
;
4203 info
->time
+= e
->time
;
4205 info
->min_size
= info
->size_time_table
[0].size
;
4207 info
->call_size_time_table
.release ();
4208 if (node
->callees
|| node
->indirect_calls
)
4209 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4211 ~(clause_t
) (1 << predicate::false_condition
),
4213 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4214 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4218 /* This function performs intraprocedural analysis in NODE that is required to
4219 inline indirect calls. */
4222 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4224 ipa_analyze_node (node
);
4225 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4227 ipa_print_node_params (dump_file
, node
);
4228 ipa_print_node_jump_functions (dump_file
, node
);
4233 /* Note function body size. */
4236 inline_analyze_function (struct cgraph_node
*node
)
4238 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4241 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4242 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4243 inline_indirect_intraprocedural_analysis (node
);
4244 compute_fn_summary (node
, false);
4247 struct cgraph_edge
*e
;
4248 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4249 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4250 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4251 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4258 /* Called when new function is inserted to callgraph late. */
4261 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4263 inline_analyze_function (node
);
4266 /* Note function body size. */
4269 ipa_fn_summary_generate (void)
4271 struct cgraph_node
*node
;
4273 FOR_EACH_DEFINED_FUNCTION (node
)
4274 if (DECL_STRUCT_FUNCTION (node
->decl
))
4275 node
->versionable
= tree_versionable_function_p (node
->decl
);
4277 ipa_fn_summary_alloc ();
4279 ipa_fn_summaries
->enable_insertion_hook ();
4281 ipa_register_cgraph_hooks ();
4283 FOR_EACH_DEFINED_FUNCTION (node
)
4285 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4286 || opt_for_fn (node
->decl
, optimize
)))
4287 inline_analyze_function (node
);
4291 /* Write inline summary for edge E to OB. */
4294 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4297 class ipa_call_summary
*es
= prevails
4298 ? ipa_call_summaries
->get_create (e
) : NULL
;
4302 int size
= streamer_read_uhwi (ib
);
4303 int time
= streamer_read_uhwi (ib
);
4304 int depth
= streamer_read_uhwi (ib
);
4308 es
->call_stmt_size
= size
;
4309 es
->call_stmt_time
= time
;
4310 es
->loop_depth
= depth
;
4313 bitpack_d bp
= streamer_read_bitpack (ib
);
4315 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4317 bp_unpack_value (&bp
, 1);
4321 edge_set_predicate (e
, &p
);
4322 length
= streamer_read_uhwi (ib
);
4324 && (e
->possibly_call_in_translation_unit_p ()
4325 /* Also stream in jump functions to builtins in hope that they
4326 will get fnspecs. */
4327 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4329 es
->param
.safe_grow_cleared (length
, true);
4330 for (i
= 0; i
< length
; i
++)
4332 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4333 es
->param
[i
].points_to_local_or_readonly_memory
4334 = streamer_read_uhwi (ib
);
4339 for (i
= 0; i
< length
; i
++)
4341 streamer_read_uhwi (ib
);
4342 streamer_read_uhwi (ib
);
4348 /* Stream in inline summaries from the section. */
4351 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4354 const struct lto_function_header
*header
=
4355 (const struct lto_function_header
*) data
;
4356 const int cfg_offset
= sizeof (struct lto_function_header
);
4357 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4358 const int string_offset
= main_offset
+ header
->main_size
;
4359 class data_in
*data_in
;
4360 unsigned int i
, count2
, j
;
4361 unsigned int f_count
;
4363 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4364 file_data
->mode_table
);
4367 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4368 header
->string_size
, vNULL
);
4369 f_count
= streamer_read_uhwi (&ib
);
4370 for (i
= 0; i
< f_count
; i
++)
4373 struct cgraph_node
*node
;
4374 class ipa_fn_summary
*info
;
4375 class ipa_node_params
*params_summary
;
4376 class ipa_size_summary
*size_info
;
4377 lto_symtab_encoder_t encoder
;
4378 struct bitpack_d bp
;
4379 struct cgraph_edge
*e
;
4382 index
= streamer_read_uhwi (&ib
);
4383 encoder
= file_data
->symtab_node_encoder
;
4384 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4386 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4387 params_summary
= node
->prevailing_p () ? IPA_NODE_REF (node
) : NULL
;
4388 size_info
= node
->prevailing_p ()
4389 ? ipa_size_summaries
->get_create (node
) : NULL
;
4391 int stack_size
= streamer_read_uhwi (&ib
);
4392 int size
= streamer_read_uhwi (&ib
);
4393 sreal time
= sreal::stream_in (&ib
);
4397 info
->estimated_stack_size
4398 = size_info
->estimated_self_stack_size
= stack_size
;
4399 size_info
->size
= size_info
->self_size
= size
;
4403 bp
= streamer_read_bitpack (&ib
);
4406 info
->inlinable
= bp_unpack_value (&bp
, 1);
4407 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4411 bp_unpack_value (&bp
, 1);
4412 bp_unpack_value (&bp
, 1);
4415 count2
= streamer_read_uhwi (&ib
);
4416 gcc_assert (!info
|| !info
->conds
);
4418 vec_safe_reserve_exact (info
->conds
, count2
);
4419 for (j
= 0; j
< count2
; j
++)
4422 unsigned int k
, count3
;
4423 c
.operand_num
= streamer_read_uhwi (&ib
);
4424 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4425 c
.type
= stream_read_tree (&ib
, data_in
);
4426 c
.val
= stream_read_tree (&ib
, data_in
);
4427 bp
= streamer_read_bitpack (&ib
);
4428 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4429 c
.by_ref
= bp_unpack_value (&bp
, 1);
4431 c
.offset
= streamer_read_uhwi (&ib
);
4432 count3
= streamer_read_uhwi (&ib
);
4435 vec_safe_reserve_exact (c
.param_ops
, count3
);
4437 ipa_set_param_used_by_ipa_predicates
4438 (params_summary
, c
.operand_num
, true);
4439 for (k
= 0; k
< count3
; k
++)
4441 struct expr_eval_op op
;
4442 enum gimple_rhs_class rhs_class
;
4443 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4444 op
.type
= stream_read_tree (&ib
, data_in
);
4445 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4447 case GIMPLE_UNARY_RHS
:
4449 op
.val
[0] = NULL_TREE
;
4450 op
.val
[1] = NULL_TREE
;
4453 case GIMPLE_BINARY_RHS
:
4454 case GIMPLE_TERNARY_RHS
:
4455 bp
= streamer_read_bitpack (&ib
);
4456 op
.index
= bp_unpack_value (&bp
, 2);
4457 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4458 if (rhs_class
== GIMPLE_BINARY_RHS
)
4459 op
.val
[1] = NULL_TREE
;
4461 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4465 fatal_error (UNKNOWN_LOCATION
,
4466 "invalid fnsummary in LTO stream");
4469 c
.param_ops
->quick_push (op
);
4472 info
->conds
->quick_push (c
);
4474 count2
= streamer_read_uhwi (&ib
);
4475 gcc_assert (!info
|| !info
->size_time_table
.length ());
4477 info
->size_time_table
.reserve_exact (count2
);
4478 for (j
= 0; j
< count2
; j
++)
4480 class size_time_entry e
;
4482 e
.size
= streamer_read_uhwi (&ib
);
4483 e
.time
= sreal::stream_in (&ib
);
4484 e
.exec_predicate
.stream_in (&ib
);
4485 e
.nonconst_predicate
.stream_in (&ib
);
4488 info
->size_time_table
.quick_push (e
);
4491 count2
= streamer_read_uhwi (&ib
);
4492 for (j
= 0; j
< count2
; j
++)
4495 sreal fcp_freq
= sreal::stream_in (&ib
);
4498 ipa_freqcounting_predicate fcp
;
4499 fcp
.predicate
= NULL
;
4500 set_hint_predicate (&fcp
.predicate
, p
);
4501 fcp
.freq
= fcp_freq
;
4502 vec_safe_push (info
->loop_iterations
, fcp
);
4505 count2
= streamer_read_uhwi (&ib
);
4506 for (j
= 0; j
< count2
; j
++)
4509 sreal fcp_freq
= sreal::stream_in (&ib
);
4512 ipa_freqcounting_predicate fcp
;
4513 fcp
.predicate
= NULL
;
4514 set_hint_predicate (&fcp
.predicate
, p
);
4515 fcp
.freq
= fcp_freq
;
4516 vec_safe_push (info
->loop_strides
, fcp
);
4519 count2
= streamer_read_uhwi (&ib
);
4521 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4522 for (j
= 0; j
< count2
; j
++)
4524 int parm
= streamer_read_uhwi (&ib
);
4526 info
->builtin_constant_p_parms
.quick_push (parm
);
4528 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4529 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4530 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4531 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4534 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4536 lto_data_in_delete (data_in
);
4540 /* Read inline summary. Jump functions are shared among ipa-cp
4541 and inliner, so when ipa-cp is active, we don't need to write them
4545 ipa_fn_summary_read (void)
4547 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4548 struct lto_file_decl_data
*file_data
;
4551 ipa_prop_read_jump_functions ();
4552 ipa_fn_summary_alloc ();
4554 while ((file_data
= file_data_vec
[j
++]))
4558 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4561 inline_read_section (file_data
, data
, len
);
4563 /* Fatal error here. We do not want to support compiling ltrans units
4564 with different version of compiler or different flags than the WPA
4565 unit, so this should never happen. */
4566 fatal_error (input_location
,
4567 "ipa inline summary is missing in input file");
4569 ipa_register_cgraph_hooks ();
4571 gcc_assert (ipa_fn_summaries
);
4572 ipa_fn_summaries
->enable_insertion_hook ();
4576 /* Write inline summary for edge E to OB. */
4579 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4581 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4584 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4585 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4586 streamer_write_uhwi (ob
, es
->loop_depth
);
4588 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4589 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4590 streamer_write_bitpack (&bp
);
4593 es
->predicate
->stream_out (ob
);
4595 streamer_write_uhwi (ob
, 0);
4596 streamer_write_uhwi (ob
, es
->param
.length ());
4597 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4599 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4600 streamer_write_uhwi (ob
, es
->param
[i
].points_to_local_or_readonly_memory
);
4605 /* Write inline summary for node in SET.
4606 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4607 active, we don't need to write them twice. */
4610 ipa_fn_summary_write (void)
4612 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4613 lto_symtab_encoder_iterator lsei
;
4614 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4615 unsigned int count
= 0;
4617 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4618 lsei_next_function_in_partition (&lsei
))
4620 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4621 if (cnode
->definition
&& !cnode
->alias
)
4624 streamer_write_uhwi (ob
, count
);
4626 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4627 lsei_next_function_in_partition (&lsei
))
4629 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4630 if (cnode
->definition
&& !cnode
->alias
)
4632 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4633 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4634 struct bitpack_d bp
;
4635 struct cgraph_edge
*edge
;
4638 struct condition
*c
;
4640 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4641 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4642 streamer_write_hwi (ob
, size_info
->self_size
);
4643 info
->time
.stream_out (ob
);
4644 bp
= bitpack_create (ob
->main_stream
);
4645 bp_pack_value (&bp
, info
->inlinable
, 1);
4646 bp_pack_value (&bp
, false, 1);
4647 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4648 streamer_write_bitpack (&bp
);
4649 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4650 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4653 struct expr_eval_op
*op
;
4655 streamer_write_uhwi (ob
, c
->operand_num
);
4656 streamer_write_uhwi (ob
, c
->code
);
4657 stream_write_tree (ob
, c
->type
, true);
4658 stream_write_tree (ob
, c
->val
, true);
4659 bp
= bitpack_create (ob
->main_stream
);
4660 bp_pack_value (&bp
, c
->agg_contents
, 1);
4661 bp_pack_value (&bp
, c
->by_ref
, 1);
4662 streamer_write_bitpack (&bp
);
4663 if (c
->agg_contents
)
4664 streamer_write_uhwi (ob
, c
->offset
);
4665 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4666 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4668 streamer_write_uhwi (ob
, op
->code
);
4669 stream_write_tree (ob
, op
->type
, true);
4672 bp
= bitpack_create (ob
->main_stream
);
4673 bp_pack_value (&bp
, op
->index
, 2);
4674 streamer_write_bitpack (&bp
);
4675 stream_write_tree (ob
, op
->val
[0], true);
4677 stream_write_tree (ob
, op
->val
[1], true);
4681 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4682 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4684 streamer_write_uhwi (ob
, e
->size
);
4685 e
->time
.stream_out (ob
);
4686 e
->exec_predicate
.stream_out (ob
);
4687 e
->nonconst_predicate
.stream_out (ob
);
4689 ipa_freqcounting_predicate
*fcp
;
4690 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4691 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4693 fcp
->predicate
->stream_out (ob
);
4694 fcp
->freq
.stream_out (ob
);
4696 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4697 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4699 fcp
->predicate
->stream_out (ob
);
4700 fcp
->freq
.stream_out (ob
);
4702 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4704 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4706 streamer_write_uhwi (ob
, ip
);
4707 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4708 write_ipa_call_summary (ob
, edge
);
4709 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4710 write_ipa_call_summary (ob
, edge
);
4713 streamer_write_char_stream (ob
->main_stream
, 0);
4714 produce_asm (ob
, NULL
);
4715 destroy_output_block (ob
);
4717 ipa_prop_write_jump_functions ();
4721 /* Release function summary. */
4724 ipa_free_fn_summary (void)
4726 if (!ipa_call_summaries
)
4728 ggc_delete (ipa_fn_summaries
);
4729 ipa_fn_summaries
= NULL
;
4730 delete ipa_call_summaries
;
4731 ipa_call_summaries
= NULL
;
4732 edge_predicate_pool
.release ();
4733 /* During IPA this is one of largest datastructures to release. */
4738 /* Release function summary. */
4741 ipa_free_size_summary (void)
4743 if (!ipa_size_summaries
)
4745 delete ipa_size_summaries
;
4746 ipa_size_summaries
= NULL
;
4751 const pass_data pass_data_local_fn_summary
=
4753 GIMPLE_PASS
, /* type */
4754 "local-fnsummary", /* name */
4755 OPTGROUP_INLINE
, /* optinfo_flags */
4756 TV_INLINE_PARAMETERS
, /* tv_id */
4757 0, /* properties_required */
4758 0, /* properties_provided */
4759 0, /* properties_destroyed */
4760 0, /* todo_flags_start */
4761 0, /* todo_flags_finish */
4764 class pass_local_fn_summary
: public gimple_opt_pass
4767 pass_local_fn_summary (gcc::context
*ctxt
)
4768 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4771 /* opt_pass methods: */
4772 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
4773 virtual unsigned int execute (function
*)
4775 return compute_fn_summary_for_current ();
4778 }; // class pass_local_fn_summary
4783 make_pass_local_fn_summary (gcc::context
*ctxt
)
4785 return new pass_local_fn_summary (ctxt
);
4789 /* Free inline summary. */
4793 const pass_data pass_data_ipa_free_fn_summary
=
4795 SIMPLE_IPA_PASS
, /* type */
4796 "free-fnsummary", /* name */
4797 OPTGROUP_NONE
, /* optinfo_flags */
4798 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4799 0, /* properties_required */
4800 0, /* properties_provided */
4801 0, /* properties_destroyed */
4802 0, /* todo_flags_start */
4803 0, /* todo_flags_finish */
4806 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4809 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4810 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4814 /* opt_pass methods: */
4815 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
4816 void set_pass_param (unsigned int n
, bool param
)
4818 gcc_assert (n
== 0);
4821 virtual bool gate (function
*) { return true; }
4822 virtual unsigned int execute (function
*)
4824 ipa_free_fn_summary ();
4825 /* Free ipa-prop structures if they are no longer needed. */
4826 ipa_free_all_structures_after_iinln ();
4828 ipa_free_size_summary ();
4834 }; // class pass_ipa_free_fn_summary
4838 simple_ipa_opt_pass
*
4839 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4841 return new pass_ipa_free_fn_summary (ctxt
);
4846 const pass_data pass_data_ipa_fn_summary
=
4848 IPA_PASS
, /* type */
4849 "fnsummary", /* name */
4850 OPTGROUP_INLINE
, /* optinfo_flags */
4851 TV_IPA_FNSUMMARY
, /* tv_id */
4852 0, /* properties_required */
4853 0, /* properties_provided */
4854 0, /* properties_destroyed */
4855 0, /* todo_flags_start */
4856 ( TODO_dump_symtab
), /* todo_flags_finish */
4859 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4862 pass_ipa_fn_summary (gcc::context
*ctxt
)
4863 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4864 ipa_fn_summary_generate
, /* generate_summary */
4865 ipa_fn_summary_write
, /* write_summary */
4866 ipa_fn_summary_read
, /* read_summary */
4867 NULL
, /* write_optimization_summary */
4868 NULL
, /* read_optimization_summary */
4869 NULL
, /* stmt_fixup */
4870 0, /* function_transform_todo_flags_start */
4871 NULL
, /* function_transform */
4872 NULL
) /* variable_transform */
4875 /* opt_pass methods: */
4876 virtual unsigned int execute (function
*) { return 0; }
4878 }; // class pass_ipa_fn_summary
4883 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4885 return new pass_ipa_fn_summary (ctxt
);
4888 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4889 within the same process. For use by toplev::finalize. */
4892 ipa_fnsummary_c_finalize (void)
4894 ipa_free_fn_summary ();