1 /* Function summary pass.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
64 #include "tree-streamer.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
86 #include "tree-into-ssa.h"
87 #include "symtab-clones.h"
90 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
91 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
92 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
94 /* Edge predicates goes here. */
95 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
100 ipa_dump_hints (FILE *f
, ipa_hints hints
)
104 fprintf (f
, "IPA hints:");
105 if (hints
& INLINE_HINT_indirect_call
)
107 hints
&= ~INLINE_HINT_indirect_call
;
108 fprintf (f
, " indirect_call");
110 if (hints
& INLINE_HINT_loop_iterations
)
112 hints
&= ~INLINE_HINT_loop_iterations
;
113 fprintf (f
, " loop_iterations");
115 if (hints
& INLINE_HINT_loop_stride
)
117 hints
&= ~INLINE_HINT_loop_stride
;
118 fprintf (f
, " loop_stride");
120 if (hints
& INLINE_HINT_same_scc
)
122 hints
&= ~INLINE_HINT_same_scc
;
123 fprintf (f
, " same_scc");
125 if (hints
& INLINE_HINT_in_scc
)
127 hints
&= ~INLINE_HINT_in_scc
;
128 fprintf (f
, " in_scc");
130 if (hints
& INLINE_HINT_cross_module
)
132 hints
&= ~INLINE_HINT_cross_module
;
133 fprintf (f
, " cross_module");
135 if (hints
& INLINE_HINT_declared_inline
)
137 hints
&= ~INLINE_HINT_declared_inline
;
138 fprintf (f
, " declared_inline");
140 if (hints
& INLINE_HINT_known_hot
)
142 hints
&= ~INLINE_HINT_known_hot
;
143 fprintf (f
, " known_hot");
145 if (hints
& INLINE_HINT_builtin_constant_p
)
147 hints
&= ~INLINE_HINT_builtin_constant_p
;
148 fprintf (f
, " builtin_constant_p");
154 /* Record SIZE and TIME to SUMMARY.
155 The accounted code will be executed when EXEC_PRED is true.
156 When NONCONST_PRED is false the code will evaluate to constant and
157 will get optimized out in specialized clones of the function.
158 If CALL is true account to call_size_time_table rather than
162 ipa_fn_summary::account_size_time (int size
, sreal time
,
163 const predicate
&exec_pred
,
164 const predicate
&nonconst_pred_in
,
170 predicate nonconst_pred
;
171 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
173 if (exec_pred
== false)
176 nonconst_pred
= nonconst_pred_in
& exec_pred
;
178 if (nonconst_pred
== false)
181 /* We need to create initial empty unconditional clause, but otherwise
182 we don't need to account empty times and sizes. */
183 if (!size
&& time
== 0 && table
->length ())
186 /* Only for calls we are unaccounting what we previously recorded. */
187 gcc_checking_assert (time
>= 0 || call
);
189 for (i
= 0; table
->iterate (i
, &e
); i
++)
190 if (e
->exec_predicate
== exec_pred
191 && e
->nonconst_predicate
== nonconst_pred
)
196 if (i
== max_size_time_table_size
)
201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
203 "\t\tReached limit on number of entries, "
204 "ignoring the predicate.");
206 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
209 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
210 ((double) size
) / ipa_fn_summary::size_scale
,
211 (time
.to_double ()), found
? "" : "new ");
212 exec_pred
.dump (dump_file
, conds
, 0);
213 if (exec_pred
!= nonconst_pred
)
215 fprintf (dump_file
, " nonconst:");
216 nonconst_pred
.dump (dump_file
, conds
);
219 fprintf (dump_file
, "\n");
223 class size_time_entry new_entry
;
224 new_entry
.size
= size
;
225 new_entry
.time
= time
;
226 new_entry
.exec_predicate
= exec_pred
;
227 new_entry
.nonconst_predicate
= nonconst_pred
;
229 call_size_time_table
.safe_push (new_entry
);
231 size_time_table
.safe_push (new_entry
);
237 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
238 /* Tolerate small roundoff issues. */
244 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
246 static struct cgraph_edge
*
247 redirect_to_unreachable (struct cgraph_edge
*e
)
249 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
250 struct cgraph_node
*target
= cgraph_node::get_create
251 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
254 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
256 e
= cgraph_edge::make_direct (e
, target
);
258 e
->redirect_callee (target
);
259 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
260 e
->inline_failed
= CIF_UNREACHABLE
;
261 e
->count
= profile_count::zero ();
262 es
->call_stmt_size
= 0;
263 es
->call_stmt_time
= 0;
265 callee
->remove_symbol_and_inline_clones ();
269 /* Set predicate for edge E. */
272 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
274 /* If the edge is determined to be never executed, redirect it
275 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
277 if (predicate
&& *predicate
== false
278 /* When handling speculative edges, we need to do the redirection
279 just once. Do it always on the direct edge, so we do not
280 attempt to resolve speculation while duplicating the edge. */
281 && (!e
->speculative
|| e
->callee
))
282 e
= redirect_to_unreachable (e
);
284 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
285 if (predicate
&& *predicate
!= true)
288 es
->predicate
= edge_predicate_pool
.allocate ();
289 *es
->predicate
= *predicate
;
294 edge_predicate_pool
.remove (es
->predicate
);
295 es
->predicate
= NULL
;
299 /* Set predicate for hint *P. */
302 set_hint_predicate (predicate
**p
, predicate new_predicate
)
304 if (new_predicate
== false || new_predicate
== true)
307 edge_predicate_pool
.remove (*p
);
313 *p
= edge_predicate_pool
.allocate ();
318 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
319 Otherwise add a new item to the vector with this predicate and frerq equal
320 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
321 in which case the function does nothing. */
324 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
325 const predicate
&new_predicate
, sreal add_freq
,
326 unsigned max_num_predicates
)
328 if (new_predicate
== false || new_predicate
== true)
330 ipa_freqcounting_predicate
*f
;
331 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
332 if (new_predicate
== f
->predicate
)
337 if (vec_safe_length (*v
) >= max_num_predicates
)
338 /* Too many different predicates to account for. */
341 ipa_freqcounting_predicate fcp
;
342 fcp
.predicate
= NULL
;
343 set_hint_predicate (&fcp
.predicate
, new_predicate
);
345 vec_safe_push (*v
, fcp
);
349 /* Compute what conditions may or may not hold given information about
350 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
351 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
352 copy when called in a given context. It is a bitmask of conditions. Bit
353 0 means that condition is known to be false, while bit 1 means that condition
354 may or may not be true. These differs - for example NOT_INLINED condition
355 is always false in the second and also builtin_constant_p tests cannot use
356 the fact that parameter is indeed a constant.
358 When INLINE_P is true, assume that we are inlining. AVAL contains known
359 information about argument values. The function does not modify its content
360 and so AVALs could also be of type ipa_call_arg_values but so far all
361 callers work with the auto version and so we avoid the conversion for
364 ERROR_MARK value of an argument means compile time invariant. */
367 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
369 ipa_auto_call_arg_values
*avals
,
370 clause_t
*ret_clause
,
371 clause_t
*ret_nonspec_clause
)
373 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
374 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
375 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
379 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
384 struct expr_eval_op
*op
;
386 /* We allow call stmt to have fewer arguments than the callee function
387 (especially for K&R style programs). So bound check here (we assume
388 m_known_aggs vector is either empty or has the same length as
390 gcc_checking_assert (!avals
->m_known_aggs
.length ()
391 || !avals
->m_known_vals
.length ()
392 || (avals
->m_known_vals
.length ()
393 == avals
->m_known_aggs
.length ()));
397 if (c
->code
== predicate::changed
399 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
402 if (ipa_agg_value_set
*agg
= avals
->safe_aggval_at (c
->operand_num
))
404 tree sval
= avals
->safe_sval_at (c
->operand_num
);
405 val
= ipa_find_agg_cst_for_param (agg
, sval
, c
->offset
,
413 val
= avals
->safe_sval_at (c
->operand_num
);
414 if (val
&& val
== error_mark_node
&& c
->code
!= predicate::changed
)
419 && (c
->code
== predicate::changed
420 || c
->code
== predicate::is_not_constant
))
422 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
423 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
426 if (c
->code
== predicate::changed
)
428 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
432 if (c
->code
== predicate::is_not_constant
)
434 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
438 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
440 if (c
->type
!= TREE_TYPE (val
))
441 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
442 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
447 val
= fold_unary (op
->code
, op
->type
, val
);
448 else if (!op
->val
[1])
449 val
= fold_binary (op
->code
, op
->type
,
450 op
->index
? op
->val
[0] : val
,
451 op
->index
? val
: op
->val
[0]);
452 else if (op
->index
== 0)
453 val
= fold_ternary (op
->code
, op
->type
,
454 val
, op
->val
[0], op
->val
[1]);
455 else if (op
->index
== 1)
456 val
= fold_ternary (op
->code
, op
->type
,
457 op
->val
[0], val
, op
->val
[1]);
458 else if (op
->index
== 2)
459 val
= fold_ternary (op
->code
, op
->type
,
460 op
->val
[0], op
->val
[1], val
);
466 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
469 if (res
&& integer_zerop (res
))
471 if (res
&& integer_onep (res
))
473 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
474 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
478 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
480 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
482 value_range vr
= avals
->m_known_value_ranges
[c
->operand_num
];
483 if (!vr
.undefined_p ()
485 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
487 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
490 range_fold_unary_expr (&res
, NOP_EXPR
,
491 c
->type
, &vr
, vr
.type ());
496 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
498 if (vr
.varying_p () || vr
.undefined_p ())
503 range_fold_unary_expr (&res
, op
->code
, op
->type
, &vr
, type
);
504 else if (!op
->val
[1])
506 value_range
op0 (op
->val
[0], op
->val
[0]);
507 range_fold_binary_expr (&res
, op
->code
, op
->type
,
508 op
->index
? &op0
: &vr
,
509 op
->index
? &vr
: &op0
);
516 if (!vr
.varying_p () && !vr
.undefined_p ())
519 value_range
val_vr (c
->val
, c
->val
);
520 range_fold_binary_expr (&res
, c
->code
, boolean_type_node
,
529 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
530 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
532 *ret_clause
= clause
;
533 if (ret_nonspec_clause
)
534 *ret_nonspec_clause
= nonspec_clause
;
537 /* Return true if VRP will be exectued on the function.
538 We do not want to anticipate optimizations that will not happen.
540 FIXME: This can be confused with -fdisable and debug counters and thus
541 it should not be used for correctness (only to make heuristics work).
542 This means that inliner should do its own optimizations of expressions
543 that it predicts to be constant so wrong code can not be triggered by
544 builtin_constant_p. */
547 vrp_will_run_p (struct cgraph_node
*node
)
549 return (opt_for_fn (node
->decl
, optimize
)
550 && !opt_for_fn (node
->decl
, optimize_debug
)
551 && opt_for_fn (node
->decl
, flag_tree_vrp
));
554 /* Similarly about FRE. */
557 fre_will_run_p (struct cgraph_node
*node
)
559 return (opt_for_fn (node
->decl
, optimize
)
560 && !opt_for_fn (node
->decl
, optimize_debug
)
561 && opt_for_fn (node
->decl
, flag_tree_fre
));
564 /* Work out what conditions might be true at invocation of E.
565 Compute costs for inlined edge if INLINE_P is true.
567 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
568 (if non-NULL) conditions evaluated for nonspecialized clone called
571 Vectors in AVALS will be populated with useful known information about
572 argument values - information not known to have any uses will be omitted -
573 except for m_known_contexts which will only be calculated if
574 COMPUTE_CONTEXTS is true. */
577 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
578 clause_t
*clause_ptr
,
579 clause_t
*nonspec_clause_ptr
,
580 ipa_auto_call_arg_values
*avals
,
581 bool compute_contexts
)
583 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
584 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
585 class ipa_edge_args
*args
;
588 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
590 if (ipa_node_params_sum
591 && !e
->call_stmt_cannot_inline_p
592 && (info
->conds
|| compute_contexts
)
593 && (args
= IPA_EDGE_REF (e
)) != NULL
)
595 struct cgraph_node
*caller
;
596 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
597 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
598 int i
, count
= ipa_get_cs_argument_count (args
);
602 if (e
->caller
->inlined_to
)
603 caller
= e
->caller
->inlined_to
;
606 caller_parms_info
= IPA_NODE_REF (caller
);
607 callee_pi
= IPA_NODE_REF (callee
);
609 /* Watch for thunks. */
611 /* Watch for variadic functions. */
612 count
= MIN (count
, ipa_get_param_count (callee_pi
));
616 for (i
= 0; i
< count
; i
++)
618 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
620 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
621 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
623 /* Determine if we know constant value of the parameter. */
624 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
625 ipa_get_type (callee_pi
, i
));
627 if (!cst
&& e
->call_stmt
628 && i
< (int)gimple_call_num_args (e
->call_stmt
))
630 cst
= gimple_call_arg (e
->call_stmt
, i
);
631 if (!is_gimple_min_invariant (cst
))
636 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
637 if (!avals
->m_known_vals
.length ())
638 avals
->m_known_vals
.safe_grow_cleared (count
, true);
639 avals
->m_known_vals
[i
] = cst
;
641 else if (inline_p
&& !es
->param
[i
].change_prob
)
643 if (!avals
->m_known_vals
.length ())
644 avals
->m_known_vals
.safe_grow_cleared (count
, true);
645 avals
->m_known_vals
[i
] = error_mark_node
;
648 /* If we failed to get simple constant, try value range. */
649 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
650 && vrp_will_run_p (caller
)
651 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
654 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
655 ipa_get_type (callee_pi
,
657 if (!vr
.undefined_p () && !vr
.varying_p ())
659 if (!avals
->m_known_value_ranges
.length ())
661 avals
->m_known_value_ranges
.safe_grow (count
, true);
662 for (int i
= 0; i
< count
; ++i
)
663 new (&avals
->m_known_value_ranges
[i
])
666 avals
->m_known_value_ranges
[i
] = vr
;
670 /* Determine known aggregate values. */
671 if (fre_will_run_p (caller
))
673 ipa_agg_value_set agg
674 = ipa_agg_value_set_from_jfunc (caller_parms_info
,
676 if (agg
.items
.length ())
678 if (!avals
->m_known_aggs
.length ())
679 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
680 avals
->m_known_aggs
[i
] = agg
;
685 /* For calls used in polymorphic calls we further determine
686 polymorphic call context. */
688 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
690 ipa_polymorphic_call_context
691 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
692 if (!ctx
.useless_p ())
694 if (!avals
->m_known_contexts
.length ())
695 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
696 avals
->m_known_contexts
[i
]
697 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
702 gcc_assert (!count
|| callee
->thunk
);
704 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
706 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
708 for (i
= 0; i
< count
; i
++)
710 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
711 if (!is_gimple_min_invariant (cst
))
715 if (!avals
->m_known_vals
.length ())
716 avals
->m_known_vals
.safe_grow_cleared (count
, true);
717 avals
->m_known_vals
[i
] = cst
;
722 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
727 /* Allocate the function summary. */
730 ipa_fn_summary_alloc (void)
732 gcc_checking_assert (!ipa_fn_summaries
);
733 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
734 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
735 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
738 ipa_call_summary::~ipa_call_summary ()
741 edge_predicate_pool
.remove (predicate
);
746 ipa_fn_summary::~ipa_fn_summary ()
748 unsigned len
= vec_safe_length (loop_iterations
);
749 for (unsigned i
= 0; i
< len
; i
++)
750 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
751 len
= vec_safe_length (loop_strides
);
752 for (unsigned i
= 0; i
< len
; i
++)
753 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
755 call_size_time_table
.release ();
756 vec_free (loop_iterations
);
757 vec_free (loop_strides
);
758 builtin_constant_p_parms
.release ();
762 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
765 for (e
= node
->callees
; e
; e
= e
->next_callee
)
766 ipa_call_summaries
->remove (e
);
767 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
768 ipa_call_summaries
->remove (e
);
771 /* Duplicate predicates in loop hint vector, allocating memory for them and
772 remove and deallocate any uninteresting (true or false) ones. Return the
775 static vec
<ipa_freqcounting_predicate
, va_gc
> *
776 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
777 clause_t possible_truths
)
779 if (vec_safe_length (v
) == 0)
782 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
783 int len
= res
->length();
784 for (int i
= len
- 1; i
>= 0; i
--)
786 predicate new_predicate
787 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
788 /* We do not want to free previous predicate; it is used by node
790 (*res
)[i
].predicate
= NULL
;
791 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
793 if (!(*res
)[i
].predicate
)
794 res
->unordered_remove (i
);
801 /* Hook that is called by cgraph.c when a node is duplicated. */
803 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
805 ipa_fn_summary
*src_info
,
806 ipa_fn_summary
*info
)
808 new (info
) ipa_fn_summary (*src_info
);
809 /* TODO: as an optimization, we may avoid copying conditions
810 that are known to be false or true. */
811 info
->conds
= vec_safe_copy (info
->conds
);
813 clone_info
*cinfo
= clone_info::get (dst
);
814 /* When there are any replacements in the function body, see if we can figure
815 out that something was optimized out. */
816 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
818 /* Use SRC parm info since it may not be copied yet. */
819 class ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
820 ipa_auto_call_arg_values avals
;
821 int count
= ipa_get_param_count (parms_info
);
823 clause_t possible_truths
;
824 predicate true_pred
= true;
826 int optimized_out_size
= 0;
827 bool inlined_to_p
= false;
828 struct cgraph_edge
*edge
, *next
;
830 info
->size_time_table
.release ();
831 avals
.m_known_vals
.safe_grow_cleared (count
, true);
832 for (i
= 0; i
< count
; i
++)
834 struct ipa_replace_map
*r
;
836 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
838 if (r
->parm_num
== i
)
840 avals
.m_known_vals
[i
] = r
->new_tree
;
845 evaluate_conditions_for_known_args (dst
, false,
848 /* We are going to specialize,
849 so ignore nonspec truths. */
852 info
->account_size_time (0, 0, true_pred
, true_pred
);
854 /* Remap size_time vectors.
855 Simplify the predicate by pruning out alternatives that are known
857 TODO: as on optimization, we can also eliminate conditions known
859 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
861 predicate new_exec_pred
;
862 predicate new_nonconst_pred
;
863 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
865 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
867 if (new_exec_pred
== false || new_nonconst_pred
== false)
868 optimized_out_size
+= e
->size
;
870 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
874 /* Remap edge predicates with the same simplification as above.
875 Also copy constantness arrays. */
876 for (edge
= dst
->callees
; edge
; edge
= next
)
878 predicate new_predicate
;
879 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
880 next
= edge
->next_callee
;
882 if (!edge
->inline_failed
)
886 new_predicate
= es
->predicate
->remap_after_duplication
888 if (new_predicate
== false && *es
->predicate
!= false)
889 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
890 edge_set_predicate (edge
, &new_predicate
);
893 /* Remap indirect edge predicates with the same simplification as above.
894 Also copy constantness arrays. */
895 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
897 predicate new_predicate
;
898 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
899 next
= edge
->next_callee
;
901 gcc_checking_assert (edge
->inline_failed
);
904 new_predicate
= es
->predicate
->remap_after_duplication
906 if (new_predicate
== false && *es
->predicate
!= false)
908 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
909 edge_set_predicate (edge
, &new_predicate
);
911 info
->loop_iterations
912 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
915 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
917 if (info
->builtin_constant_p_parms
.length())
919 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
921 info
->builtin_constant_p_parms
= vNULL
;
922 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
923 if (!avals
.m_known_vals
[ip
])
924 info
->builtin_constant_p_parms
.safe_push (ip
);
927 /* If inliner or someone after inliner will ever start producing
928 non-trivial clones, we will get trouble with lack of information
929 about updating self sizes, because size vectors already contains
930 sizes of the callees. */
931 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
935 info
->size_time_table
= src_info
->size_time_table
.copy ();
936 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
937 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
939 info
->builtin_constant_p_parms
940 = info
->builtin_constant_p_parms
.copy ();
942 ipa_freqcounting_predicate
*f
;
943 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
945 predicate p
= *f
->predicate
;
947 set_hint_predicate (&f
->predicate
, p
);
949 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
951 predicate p
= *f
->predicate
;
953 set_hint_predicate (&f
->predicate
, p
);
956 if (!dst
->inlined_to
)
957 ipa_update_overall_fn_summary (dst
);
961 /* Hook that is called by cgraph.c when a node is duplicated. */
964 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
965 struct cgraph_edge
*dst
,
966 class ipa_call_summary
*srcinfo
,
967 class ipa_call_summary
*info
)
969 new (info
) ipa_call_summary (*srcinfo
);
970 info
->predicate
= NULL
;
971 edge_set_predicate (dst
, srcinfo
->predicate
);
972 info
->param
= srcinfo
->param
.copy ();
973 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
975 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
976 - eni_size_weights
.call_cost
);
977 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
978 - eni_time_weights
.call_cost
);
982 /* Dump edge summaries associated to NODE and recursively to all clones.
986 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
987 class ipa_fn_summary
*info
)
989 struct cgraph_edge
*edge
;
990 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
992 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
993 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
997 "%*s%s %s\n%*s freq:%4.2f",
998 indent
, "", callee
->dump_name (),
1000 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1001 indent
, "", edge
->sreal_frequency ().to_double ());
1003 if (cross_module_call_p (edge
))
1004 fprintf (f
, " cross module");
1007 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1008 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1010 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1011 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1013 fprintf (f
, " callee size:%2i stack:%2i",
1014 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1015 (int) s
->estimated_stack_size
);
1017 if (es
&& es
->predicate
)
1019 fprintf (f
, " predicate: ");
1020 es
->predicate
->dump (f
, info
->conds
);
1024 if (es
&& es
->param
.exists ())
1025 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1027 int prob
= es
->param
[i
].change_prob
;
1030 fprintf (f
, "%*s op%i is compile time invariant\n",
1032 else if (prob
!= REG_BR_PROB_BASE
)
1033 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1034 prob
* 100.0 / REG_BR_PROB_BASE
);
1035 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1036 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1039 if (!edge
->inline_failed
)
1041 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1042 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1044 (int) ipa_get_stack_frame_offset (callee
),
1045 (int) ss
->estimated_self_stack_size
);
1046 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1049 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1051 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1052 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1056 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1057 es
->call_stmt_time
);
1060 fprintf (f
, "predicate: ");
1061 es
->predicate
->dump (f
, info
->conds
);
1070 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1072 if (node
->definition
)
1074 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1075 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1080 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1081 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1082 fprintf (f
, " always_inline");
1084 fprintf (f
, " inlinable");
1085 if (s
->fp_expressions
)
1086 fprintf (f
, " fp_expression");
1087 if (s
->builtin_constant_p_parms
.length ())
1089 fprintf (f
, " builtin_constant_p_parms");
1090 for (unsigned int i
= 0;
1091 i
< s
->builtin_constant_p_parms
.length (); i
++)
1092 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1094 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1095 fprintf (f
, " self size: %i\n", ss
->self_size
);
1096 fprintf (f
, " global size: %i\n", ss
->size
);
1097 fprintf (f
, " min size: %i\n", s
->min_size
);
1098 fprintf (f
, " self stack: %i\n",
1099 (int) ss
->estimated_self_stack_size
);
1100 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1102 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1104 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1105 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1107 fprintf (f
, " size:%f, time:%f",
1108 (double) e
->size
/ ipa_fn_summary::size_scale
,
1109 e
->time
.to_double ());
1110 if (e
->exec_predicate
!= true)
1112 fprintf (f
, ", executed if:");
1113 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1115 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1117 fprintf (f
, ", nonconst if:");
1118 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1122 ipa_freqcounting_predicate
*fcp
;
1123 bool first_fcp
= true;
1124 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1128 fprintf (f
, " loop iterations:");
1131 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1132 fcp
->predicate
->dump (f
, s
->conds
);
1135 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1139 fprintf (f
, " loop strides:");
1142 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1143 fcp
->predicate
->dump (f
, s
->conds
);
1145 fprintf (f
, " calls:\n");
1146 dump_ipa_call_summary (f
, 4, node
, s
);
1150 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1155 ipa_debug_fn_summary (struct cgraph_node
*node
)
1157 ipa_dump_fn_summary (stderr
, node
);
1161 ipa_dump_fn_summaries (FILE *f
)
1163 struct cgraph_node
*node
;
1165 FOR_EACH_DEFINED_FUNCTION (node
)
1166 if (!node
->inlined_to
)
1167 ipa_dump_fn_summary (f
, node
);
1170 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1171 boolean variable pointed to by DATA. */
1174 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1177 bool *b
= (bool *) data
;
1182 /* If OP refers to value of function parameter, return the corresponding
1183 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1184 PARM_DECL) will be stored to *SIZE_P in that case too. */
1187 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1190 /* SSA_NAME referring to parm default def? */
1191 if (TREE_CODE (op
) == SSA_NAME
1192 && SSA_NAME_IS_DEFAULT_DEF (op
)
1193 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1196 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1197 return SSA_NAME_VAR (op
);
1199 /* Non-SSA parm reference? */
1200 if (TREE_CODE (op
) == PARM_DECL
)
1202 bool modified
= false;
1205 ao_ref_init (&refd
, op
);
1206 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1207 mark_modified
, &modified
, NULL
, NULL
,
1208 fbi
->aa_walk_budget
+ 1);
1211 fbi
->aa_walk_budget
= 0;
1217 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1224 /* If OP refers to value of function parameter, return the corresponding
1225 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1226 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1227 stored to *SIZE_P in that case too. */
1230 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1233 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1237 if (TREE_CODE (op
) == SSA_NAME
1238 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1239 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1240 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1241 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1246 /* If OP refers to a value of a function parameter or value loaded from an
1247 aggregate passed to a parameter (either by value or reference), return TRUE
1248 and store the number of the parameter to *INDEX_P, the access size into
1249 *SIZE_P, and information whether and how it has been loaded from an
1250 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1251 statement in which OP is used or loaded. */
1254 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1255 gimple
*stmt
, tree op
, int *index_p
,
1257 struct agg_position_info
*aggpos
)
1259 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1261 gcc_checking_assert (aggpos
);
1264 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1267 aggpos
->agg_contents
= false;
1268 aggpos
->by_ref
= false;
1272 if (TREE_CODE (op
) == SSA_NAME
)
1274 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1275 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1277 stmt
= SSA_NAME_DEF_STMT (op
);
1278 op
= gimple_assign_rhs1 (stmt
);
1279 if (!REFERENCE_CLASS_P (op
))
1280 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1284 aggpos
->agg_contents
= true;
1285 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1286 stmt
, op
, index_p
, &aggpos
->offset
,
1287 size_p
, &aggpos
->by_ref
);
1290 /* See if statement might disappear after inlining.
1291 0 - means not eliminated
1292 1 - half of statements goes away
1293 2 - for sure it is eliminated.
1294 We are not terribly sophisticated, basically looking for simple abstraction
1295 penalty wrappers. */
1298 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1300 enum gimple_code code
= gimple_code (stmt
);
1301 enum tree_code rhs_code
;
1311 if (gimple_num_ops (stmt
) != 2)
1314 rhs_code
= gimple_assign_rhs_code (stmt
);
1316 /* Casts of parameters, loads from parameters passed by reference
1317 and stores to return value or parameters are often free after
1318 inlining due to SRA and further combining.
1319 Assume that half of statements goes away. */
1320 if (CONVERT_EXPR_CODE_P (rhs_code
)
1321 || rhs_code
== VIEW_CONVERT_EXPR
1322 || rhs_code
== ADDR_EXPR
1323 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1325 tree rhs
= gimple_assign_rhs1 (stmt
);
1326 tree lhs
= gimple_assign_lhs (stmt
);
1327 tree inner_rhs
= get_base_address (rhs
);
1328 tree inner_lhs
= get_base_address (lhs
);
1329 bool rhs_free
= false;
1330 bool lhs_free
= false;
1337 /* Reads of parameter are expected to be free. */
1338 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1340 /* Match expressions of form &this->field. Those will most likely
1341 combine with something upstream after inlining. */
1342 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1344 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1345 if (TREE_CODE (op
) == PARM_DECL
)
1347 else if (TREE_CODE (op
) == MEM_REF
1348 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1353 /* When parameter is not SSA register because its address is taken
1354 and it is just copied into one, the statement will be completely
1355 free after inlining (we will copy propagate backward). */
1356 if (rhs_free
&& is_gimple_reg (lhs
))
1359 /* Reads of parameters passed by reference
1360 expected to be free (i.e. optimized out after inlining). */
1361 if (TREE_CODE (inner_rhs
) == MEM_REF
1362 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1365 /* Copying parameter passed by reference into gimple register is
1366 probably also going to copy propagate, but we can't be quite
1368 if (rhs_free
&& is_gimple_reg (lhs
))
1371 /* Writes to parameters, parameters passed by value and return value
1372 (either directly or passed via invisible reference) are free.
1374 TODO: We ought to handle testcase like
1375 struct a {int a,b;};
1383 This translate into:
1398 For that we either need to copy ipa-split logic detecting writes
1400 if (TREE_CODE (inner_lhs
) == PARM_DECL
1401 || TREE_CODE (inner_lhs
) == RESULT_DECL
1402 || (TREE_CODE (inner_lhs
) == MEM_REF
1403 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1405 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1406 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1407 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1409 0))) == RESULT_DECL
))))
1412 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1414 if (lhs_free
&& rhs_free
)
1423 /* Analyze EXPR if it represents a series of simple operations performed on
1424 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1425 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1426 Type of the parameter or load from an aggregate via the parameter is
1427 stored in *TYPE_P. Operations on the parameter are recorded to
1428 PARAM_OPS_P if it is not NULL. */
1431 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1432 gimple
*stmt
, tree expr
,
1433 int *index_p
, tree
*type_p
,
1434 struct agg_position_info
*aggpos
,
1435 expr_eval_ops
*param_ops_p
= NULL
)
1437 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1441 *param_ops_p
= NULL
;
1445 expr_eval_op eval_op
;
1447 unsigned cst_count
= 0;
1449 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1452 tree type
= TREE_TYPE (expr
);
1454 if (aggpos
->agg_contents
)
1456 /* Stop if containing bit-field. */
1457 if (TREE_CODE (expr
) == BIT_FIELD_REF
1458 || contains_bitfld_component_ref_p (expr
))
1466 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1469 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1472 switch (gimple_assign_rhs_class (stmt
))
1474 case GIMPLE_SINGLE_RHS
:
1475 expr
= gimple_assign_rhs1 (stmt
);
1478 case GIMPLE_UNARY_RHS
:
1482 case GIMPLE_BINARY_RHS
:
1486 case GIMPLE_TERNARY_RHS
:
1494 /* Stop if expression is too complex. */
1495 if (op_count
++ == op_limit
)
1500 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1501 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1502 eval_op
.val
[0] = NULL_TREE
;
1503 eval_op
.val
[1] = NULL_TREE
;
1507 for (unsigned i
= 0; i
< rhs_count
; i
++)
1509 tree op
= gimple_op (stmt
, i
+ 1);
1511 gcc_assert (op
&& !TYPE_P (op
));
1512 if (is_gimple_ip_invariant (op
))
1514 if (++cst_count
== rhs_count
)
1517 eval_op
.val
[cst_count
- 1] = op
;
1521 /* Found a non-constant operand, and record its index in rhs
1528 /* Found more than one non-constant operands. */
1534 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1537 /* Failed to decompose, free resource and return. */
1540 vec_free (*param_ops_p
);
1545 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1548 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1552 /* Avoid duplicates. */
1553 for (unsigned int i
= 0;
1554 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1557 summary
->builtin_constant_p_parms
.safe_push (parm
);
1560 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1561 predicates to the CFG edges. */
1564 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1565 class ipa_fn_summary
*summary
,
1566 class ipa_node_params
*params_summary
,
1572 struct agg_position_info aggpos
;
1573 enum tree_code code
, inverted_code
;
1578 expr_eval_ops param_ops
;
1580 last
= last_stmt (bb
);
1581 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1583 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1585 op
= gimple_cond_lhs (last
);
1587 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1590 code
= gimple_cond_code (last
);
1591 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1593 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1595 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1596 ? code
: inverted_code
);
1597 /* invert_tree_comparison will return ERROR_MARK on FP
1598 comparisons that are not EQ/NE instead of returning proper
1599 unordered one. Be sure it is not confused with NON_CONSTANT.
1601 And if the edge's target is the final block of diamond CFG graph
1602 of this conditional statement, we do not need to compute
1603 predicate for the edge because the final block's predicate must
1604 be at least as that of the first block of the statement. */
1605 if (this_code
!= ERROR_MARK
1606 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1609 = add_condition (summary
, params_summary
, index
,
1610 param_type
, &aggpos
,
1611 this_code
, gimple_cond_rhs (last
), param_ops
);
1612 e
->aux
= edge_predicate_pool
.allocate ();
1613 *(predicate
*) e
->aux
= p
;
1616 vec_free (param_ops
);
1619 if (TREE_CODE (op
) != SSA_NAME
)
1622 if (builtin_constant_p (op))
1626 Here we can predicate nonconstant_code. We can't
1627 really handle constant_code since we have no predicate
1628 for this and also the constant code is not known to be
1629 optimized away when inliner doesn't see operand is constant.
1630 Other optimizers might think otherwise. */
1631 if (gimple_cond_code (last
) != NE_EXPR
1632 || !integer_zerop (gimple_cond_rhs (last
)))
1634 set_stmt
= SSA_NAME_DEF_STMT (op
);
1635 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1636 || gimple_call_num_args (set_stmt
) != 1)
1638 op2
= gimple_call_arg (set_stmt
, 0);
1639 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1642 add_builtin_constant_p_parm (summary
, index
);
1643 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1645 predicate p
= add_condition (summary
, params_summary
, index
,
1646 param_type
, &aggpos
,
1647 predicate::is_not_constant
, NULL_TREE
);
1648 e
->aux
= edge_predicate_pool
.allocate ();
1649 *(predicate
*) e
->aux
= p
;
1654 /* If BB ends by a switch we can turn into predicates, attach corresponding
1655 predicates to the CFG edges. */
1658 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1659 class ipa_fn_summary
*summary
,
1660 class ipa_node_params
*params_summary
,
1666 struct agg_position_info aggpos
;
1672 expr_eval_ops param_ops
;
1674 lastg
= last_stmt (bb
);
1675 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1677 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1678 op
= gimple_switch_index (last
);
1679 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1683 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1684 tree type
= TREE_TYPE (op
);
1685 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1686 param_ipa_max_switch_predicate_bounds
);
1687 int bound_count
= 0;
1688 wide_int vr_wmin
, vr_wmax
;
1689 value_range_kind vr_type
= get_range_info (op
, &vr_wmin
, &vr_wmax
);
1691 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1693 e
->aux
= edge_predicate_pool
.allocate ();
1694 *(predicate
*) e
->aux
= false;
1697 e
= gimple_switch_edge (cfun
, last
, 0);
1698 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1699 default case if its target basic block is in convergence point of all
1700 switch cases, which can be determined by checking whether it
1701 post-dominates the switch statement. */
1702 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1703 bound_count
= INT_MAX
;
1705 n
= gimple_switch_num_labels (last
);
1706 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1708 tree cl
= gimple_switch_label (last
, case_idx
);
1709 tree min
= CASE_LOW (cl
);
1710 tree max
= CASE_HIGH (cl
);
1713 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1715 /* The case value might not have same type as switch expression,
1716 extend the value based on the expression type. */
1717 if (TREE_TYPE (min
) != type
)
1718 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1722 else if (TREE_TYPE (max
) != type
)
1723 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1725 /* The case's target basic block is in convergence point of all switch
1726 cases, its predicate should be at least as that of the switch
1728 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1730 else if (min
== max
)
1731 p
= add_condition (summary
, params_summary
, index
, param_type
,
1732 &aggpos
, EQ_EXPR
, min
, param_ops
);
1736 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1737 &aggpos
, GE_EXPR
, min
, param_ops
);
1738 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1739 &aggpos
, LE_EXPR
, max
, param_ops
);
1742 *(class predicate
*) e
->aux
1743 = p
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1745 /* If there are too many disjoint case ranges, predicate for default
1746 case might become too complicated. So add a limit here. */
1747 if (bound_count
> bound_limit
)
1750 bool new_range
= true;
1752 if (!ranges
.is_empty ())
1754 wide_int curr_wmin
= wi::to_wide (min
);
1755 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1757 /* Merge case ranges if they are continuous. */
1758 if (curr_wmin
== last_wmax
+ 1)
1760 else if (vr_type
== VR_ANTI_RANGE
)
1762 /* If two disjoint case ranges can be connected by anti-range
1763 of switch index, combine them to one range. */
1764 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1765 vr_type
= VR_UNDEFINED
;
1766 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1771 /* Create/extend a case range. And we count endpoints of range set,
1772 this number nearly equals to number of conditions that we will create
1773 for predicate of default case. */
1776 bound_count
+= (min
== max
) ? 1 : 2;
1777 ranges
.safe_push (std::make_pair (min
, max
));
1781 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1782 ranges
.last ().second
= max
;
1786 e
= gimple_switch_edge (cfun
, last
, 0);
1787 if (bound_count
> bound_limit
)
1789 *(class predicate
*) e
->aux
= true;
1790 vec_free (param_ops
);
1794 predicate p_seg
= true;
1795 predicate p_all
= false;
1797 if (vr_type
!= VR_RANGE
)
1799 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1800 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1803 /* Construct predicate to represent default range set that is negation of
1804 all case ranges. Case range is classified as containing single/non-single
1805 values. Suppose a piece of case ranges in the following.
1807 [D1...D2] [S1] ... [Sn] [D3...D4]
1809 To represent default case's range sets between two non-single value
1810 case ranges (From D2 to D3), we construct predicate as:
1812 D2 < x < D3 && x != S1 && ... && x != Sn
1814 for (size_t i
= 0; i
< ranges
.length (); i
++)
1816 tree min
= ranges
[i
].first
;
1817 tree max
= ranges
[i
].second
;
1820 p_seg
&= add_condition (summary
, params_summary
, index
,
1821 param_type
, &aggpos
, NE_EXPR
,
1825 /* Do not create sub-predicate for range that is beyond low bound
1827 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1829 p_seg
&= add_condition (summary
, params_summary
, index
,
1830 param_type
, &aggpos
,
1831 LT_EXPR
, min
, param_ops
);
1832 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1835 /* Do not create sub-predicate for range that is beyond up bound
1837 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1843 p_seg
= add_condition (summary
, params_summary
, index
,
1844 param_type
, &aggpos
, GT_EXPR
,
1849 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1850 *(class predicate
*) e
->aux
1851 = p_all
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1853 vec_free (param_ops
);
1857 /* For each BB in NODE attach to its AUX pointer predicate under
1858 which it is executable. */
1861 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1862 struct cgraph_node
*node
,
1863 class ipa_fn_summary
*summary
,
1864 class ipa_node_params
*params_summary
)
1866 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1870 FOR_EACH_BB_FN (bb
, my_function
)
1872 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1873 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1876 /* Entry block is always executable. */
1877 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1878 = edge_predicate_pool
.allocate ();
1879 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1881 /* A simple dataflow propagation of predicates forward in the CFG.
1882 TODO: work in reverse postorder. */
1886 FOR_EACH_BB_FN (bb
, my_function
)
1888 predicate p
= false;
1891 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1895 predicate this_bb_predicate
1896 = *(predicate
*) e
->src
->aux
;
1898 this_bb_predicate
&= (*(class predicate
*) e
->aux
);
1899 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1906 basic_block pdom_bb
;
1911 bb
->aux
= edge_predicate_pool
.allocate ();
1912 *((predicate
*) bb
->aux
) = p
;
1914 else if (p
!= *(predicate
*) bb
->aux
)
1916 /* This OR operation is needed to ensure monotonous data flow
1917 in the case we hit the limit on number of clauses and the
1918 and/or operations above give approximate answers. */
1919 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1920 if (p
!= *(predicate
*) bb
->aux
)
1923 *((predicate
*) bb
->aux
) = p
;
1927 /* For switch/if statement, we can OR-combine predicates of all
1928 its cases/branches to get predicate for basic block in their
1929 convergence point, but sometimes this will generate very
1930 complicated predicate. Actually, we can get simplified
1931 predicate in another way by using the fact that predicate
1932 for a basic block must also hold true for its post dominators.
1933 To be specific, basic block in convergence point of
1934 conditional statement should include predicate of the
1936 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1937 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1939 else if (!pdom_bb
->aux
)
1942 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1943 *((predicate
*) pdom_bb
->aux
) = p
;
1945 else if (p
!= *(predicate
*) pdom_bb
->aux
)
1947 p
= p
.or_with (summary
->conds
, *(predicate
*)pdom_bb
->aux
);
1948 if (p
!= *(predicate
*) pdom_bb
->aux
)
1951 *((predicate
*) pdom_bb
->aux
) = p
;
1960 /* Return predicate specifying when the STMT might have result that is not
1961 a compile time constant. */
1964 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1965 class ipa_fn_summary
*summary
,
1966 class ipa_node_params
*params_summary
,
1968 vec
<predicate
> nonconstant_names
)
1973 while (UNARY_CLASS_P (expr
))
1974 expr
= TREE_OPERAND (expr
, 0);
1976 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1977 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1978 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1979 predicate::changed
, NULL_TREE
);
1980 if (is_gimple_min_invariant (expr
))
1982 if (TREE_CODE (expr
) == SSA_NAME
)
1983 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1984 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1987 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1989 TREE_OPERAND (expr
, 0),
1995 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1997 TREE_OPERAND (expr
, 1),
1999 return p1
.or_with (summary
->conds
, p2
);
2001 else if (TREE_CODE (expr
) == COND_EXPR
)
2004 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2006 TREE_OPERAND (expr
, 0),
2012 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2014 TREE_OPERAND (expr
, 1),
2018 p1
= p1
.or_with (summary
->conds
, p2
);
2019 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2021 TREE_OPERAND (expr
, 2),
2023 return p2
.or_with (summary
->conds
, p1
);
2025 else if (TREE_CODE (expr
) == CALL_EXPR
)
2036 /* Return predicate specifying when the STMT might have result that is not
2037 a compile time constant. */
2040 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2041 class ipa_fn_summary
*summary
,
2042 class ipa_node_params
*params_summary
,
2044 vec
<predicate
> nonconstant_names
)
2049 tree param_type
= NULL_TREE
;
2050 predicate op_non_const
;
2053 struct agg_position_info aggpos
;
2055 /* What statements might be optimized away
2056 when their arguments are constant. */
2057 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2058 && gimple_code (stmt
) != GIMPLE_COND
2059 && gimple_code (stmt
) != GIMPLE_SWITCH
2060 && (gimple_code (stmt
) != GIMPLE_CALL
2061 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2064 /* Stores will stay anyway. */
2065 if (gimple_store_p (stmt
))
2068 is_load
= gimple_assign_load_p (stmt
);
2070 /* Loads can be optimized when the value is known. */
2073 tree op
= gimple_assign_rhs1 (stmt
);
2074 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2081 /* See if we understand all operands before we start
2082 adding conditionals. */
2083 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2085 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2086 /* For arguments we can build a condition. */
2087 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2089 if (TREE_CODE (use
) != SSA_NAME
)
2091 /* If we know when operand is constant,
2092 we still can say something useful. */
2093 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2100 add_condition (summary
, params_summary
,
2101 base_index
, param_type
, &aggpos
,
2102 predicate::changed
, NULL_TREE
);
2104 op_non_const
= false;
2105 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2107 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2110 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2112 if (index
!= base_index
)
2113 p
= add_condition (summary
, params_summary
, index
,
2114 TREE_TYPE (parm
), NULL
,
2115 predicate::changed
, NULL_TREE
);
2120 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2121 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2123 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2124 && gimple_op (stmt
, 0)
2125 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2126 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2128 return op_non_const
;
2131 struct record_modified_bb_info
2138 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2139 probability how often it changes between USE_BB.
2140 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2141 is in different loop nest, we can do better.
2142 This is all just estimate. In theory we look for minimal cut separating
2143 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2147 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2149 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2150 if (l
&& l
->header
->count
< init_bb
->count
)
2155 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2156 set except for info->stmt. */
2159 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2161 struct record_modified_bb_info
*info
=
2162 (struct record_modified_bb_info
*) data
;
2163 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2165 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2167 bitmap_set_bit (info
->bb_set
,
2168 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2169 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2171 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2172 gimple_bb (info
->stmt
))->index
);
2175 fprintf (dump_file
, " Param ");
2176 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2177 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2178 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2180 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2181 gimple_bb (info
->stmt
))->index
);
2182 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2187 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2188 will change since last invocation of STMT.
2190 Value 0 is reserved for compile time invariants.
2191 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2192 ought to be REG_BR_PROB_BASE / estimated_iters. */
2195 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2197 tree op
= gimple_call_arg (stmt
, i
);
2198 basic_block bb
= gimple_bb (stmt
);
2200 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2201 op
= TREE_OPERAND (op
, 0);
2203 tree base
= get_base_address (op
);
2205 /* Global invariants never change. */
2206 if (is_gimple_min_invariant (base
))
2209 /* We would have to do non-trivial analysis to really work out what
2210 is the probability of value to change (i.e. when init statement
2211 is in a sibling loop of the call).
2213 We do an conservative estimate: when call is executed N times more often
2214 than the statement defining value, we take the frequency 1/N. */
2215 if (TREE_CODE (base
) == SSA_NAME
)
2217 profile_count init_count
;
2219 if (!bb
->count
.nonzero_p ())
2220 return REG_BR_PROB_BASE
;
2222 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2223 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2225 init_count
= get_minimal_bb
2226 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2227 gimple_bb (stmt
))->count
;
2229 if (init_count
< bb
->count
)
2230 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2231 * REG_BR_PROB_BASE
).to_int (), 1);
2232 return REG_BR_PROB_BASE
;
2237 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2238 struct record_modified_bb_info info
;
2239 tree init
= ctor_for_folding (base
);
2241 if (init
!= error_mark_node
)
2243 if (!bb
->count
.nonzero_p ())
2244 return REG_BR_PROB_BASE
;
2247 fprintf (dump_file
, " Analyzing param change probability of ");
2248 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2249 fprintf (dump_file
, "\n");
2251 ao_ref_init (&refd
, op
);
2254 info
.bb_set
= BITMAP_ALLOC (NULL
);
2256 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2257 NULL
, NULL
, fbi
->aa_walk_budget
);
2258 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2263 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2265 fprintf (dump_file
, " Set in same BB as used.\n");
2267 BITMAP_FREE (info
.bb_set
);
2268 return REG_BR_PROB_BASE
;
2273 /* Lookup the most frequent update of the value and believe that
2274 it dominates all the other; precise analysis here is difficult. */
2275 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2276 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2279 fprintf (dump_file
, " Set with count ");
2280 max
.dump (dump_file
);
2281 fprintf (dump_file
, " and used with count ");
2282 bb
->count
.dump (dump_file
);
2283 fprintf (dump_file
, " freq %f\n",
2284 max
.to_sreal_scale (bb
->count
).to_double ());
2287 BITMAP_FREE (info
.bb_set
);
2288 if (max
< bb
->count
)
2289 return MAX ((max
.to_sreal_scale (bb
->count
)
2290 * REG_BR_PROB_BASE
).to_int (), 1);
2291 return REG_BR_PROB_BASE
;
2295 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2296 sub-graph and if the predicate the condition depends on is known. If so,
2297 return true and store the pointer the predicate in *P. */
2300 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2301 ipa_fn_summary
*summary
,
2302 class ipa_node_params
*params_summary
,
2305 vec
<predicate
> nonconstant_names
)
2309 basic_block first_bb
= NULL
;
2312 if (single_pred_p (bb
))
2318 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2320 if (single_succ_p (e
->src
))
2322 if (!single_pred_p (e
->src
))
2325 first_bb
= single_pred (e
->src
);
2326 else if (single_pred (e
->src
) != first_bb
)
2333 else if (e
->src
!= first_bb
)
2341 stmt
= last_stmt (first_bb
);
2343 || gimple_code (stmt
) != GIMPLE_COND
2344 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2347 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2348 gimple_cond_lhs (stmt
),
2356 /* Given a PHI statement in a function described by inline properties SUMMARY
2357 and *P being the predicate describing whether the selected PHI argument is
2358 known, store a predicate for the result of the PHI statement into
2359 NONCONSTANT_NAMES, if possible. */
2362 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2364 vec
<predicate
> nonconstant_names
)
2368 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2370 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2371 if (!is_gimple_min_invariant (arg
))
2373 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2374 *p
= p
->or_with (summary
->conds
,
2375 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2383 fprintf (dump_file
, "\t\tphi predicate: ");
2384 p
->dump (dump_file
, summary
->conds
);
2386 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2389 /* For a typical usage of __builtin_expect (a<b, 1), we
2390 may introduce an extra relation stmt:
2391 With the builtin, we have
2394 t3 = __builtin_expect (t2, 1);
2397 Without the builtin, we have
2400 This affects the size/time estimation and may have
2401 an impact on the earlier inlining.
2402 Here find this pattern and fix it up later. */
2405 find_foldable_builtin_expect (basic_block bb
)
2407 gimple_stmt_iterator bsi
;
2409 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2411 gimple
*stmt
= gsi_stmt (bsi
);
2412 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2413 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2414 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2416 tree var
= gimple_call_lhs (stmt
);
2417 tree arg
= gimple_call_arg (stmt
, 0);
2418 use_operand_p use_p
;
2425 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2427 while (TREE_CODE (arg
) == SSA_NAME
)
2429 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2430 if (!is_gimple_assign (stmt_tmp
))
2432 switch (gimple_assign_rhs_code (stmt_tmp
))
2451 arg
= gimple_assign_rhs1 (stmt_tmp
);
2454 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2455 && gimple_code (use_stmt
) == GIMPLE_COND
)
2462 /* Return true when the basic blocks contains only clobbers followed by RESX.
2463 Such BBs are kept around to make removal of dead stores possible with
2464 presence of EH and will be optimized out by optimize_clobbers later in the
2467 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2468 that can be clobber only, too.. When it is false, the RESX is not necessary
2469 on the end of basic block. */
2472 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2474 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2480 if (gsi_end_p (gsi
))
2482 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2486 else if (!single_succ_p (bb
))
2489 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2491 gimple
*stmt
= gsi_stmt (gsi
);
2492 if (is_gimple_debug (stmt
))
2494 if (gimple_clobber_p (stmt
))
2496 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2501 /* See if all predecessors are either throws or clobber only BBs. */
2502 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2503 if (!(e
->flags
& EDGE_EH
)
2504 && !clobber_only_eh_bb_p (e
->src
, false))
2510 /* Return true if STMT compute a floating point expression that may be affected
2511 by -ffast-math and similar flags. */
2514 fp_expression_p (gimple
*stmt
)
2519 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2520 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2525 /* Return true if T references memory location that is local
2526 for the function (that means, dead after return) or read-only. */
2529 refs_local_or_readonly_memory_p (tree t
)
2531 /* Non-escaping memory is fine. */
2532 t
= get_base_address (t
);
2533 if ((TREE_CODE (t
) == MEM_REF
2534 || TREE_CODE (t
) == TARGET_MEM_REF
))
2535 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2537 /* Automatic variables are fine. */
2539 && auto_var_in_fn_p (t
, current_function_decl
))
2542 /* Read-only variables are fine. */
2543 if (DECL_P (t
) && TREE_READONLY (t
))
2549 /* Return true if T is a pointer pointing to memory location that is local
2550 for the function (that means, dead after return) or read-only. */
2553 points_to_local_or_readonly_memory_p (tree t
)
2555 /* See if memory location is clearly invalid. */
2556 if (integer_zerop (t
))
2557 return flag_delete_null_pointer_checks
;
2558 if (TREE_CODE (t
) == SSA_NAME
)
2559 return !ptr_deref_may_alias_global_p (t
);
2560 if (TREE_CODE (t
) == ADDR_EXPR
)
2561 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2566 /* Analyze function body for NODE.
2567 EARLY indicates run from early optimization pipeline. */
2570 analyze_function_body (struct cgraph_node
*node
, bool early
)
2572 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2573 /* Estimate static overhead for function prologue/epilogue and alignment. */
2574 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2575 /* Benefits are scaled by probability of elimination that is in range
2578 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2580 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2581 class ipa_node_params
*params_summary
= early
? NULL
: IPA_NODE_REF (node
);
2582 predicate bb_predicate
;
2583 struct ipa_func_body_info fbi
;
2584 vec
<predicate
> nonconstant_names
= vNULL
;
2587 gimple
*fix_builtin_expect_stmt
;
2589 gcc_assert (my_function
&& my_function
->cfg
);
2590 gcc_assert (cfun
== my_function
);
2592 memset(&fbi
, 0, sizeof(fbi
));
2593 vec_free (info
->conds
);
2595 info
->size_time_table
.release ();
2596 info
->call_size_time_table
.release ();
2598 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2599 so we can produce proper inline hints.
2601 When optimizing and analyzing for early inliner, initialize node params
2602 so we can produce correct BB predicates. */
2604 if (opt_for_fn (node
->decl
, optimize
))
2606 calculate_dominance_info (CDI_DOMINATORS
);
2607 calculate_dominance_info (CDI_POST_DOMINATORS
);
2609 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2612 ipa_check_create_node_params ();
2613 ipa_initialize_node_params (node
);
2616 if (ipa_node_params_sum
)
2619 fbi
.info
= IPA_NODE_REF (node
);
2620 fbi
.bb_infos
= vNULL
;
2621 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2622 fbi
.param_count
= count_formal_params (node
->decl
);
2623 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2625 nonconstant_names
.safe_grow_cleared
2626 (SSANAMES (my_function
)->length (), true);
2631 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2632 node
->dump_name ());
2634 /* When we run into maximal number of entries, we assign everything to the
2635 constant truth case. Be sure to have it in list. */
2636 bb_predicate
= true;
2637 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2639 bb_predicate
= predicate::not_inlined ();
2640 info
->account_size_time (opt_for_fn (node
->decl
,
2641 param_uninlined_function_insns
)
2642 * ipa_fn_summary::size_scale
,
2643 opt_for_fn (node
->decl
,
2644 param_uninlined_function_time
),
2649 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2650 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2651 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2652 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2653 for (n
= 0; n
< nblocks
; n
++)
2655 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2656 freq
= bb
->count
.to_sreal_scale (entry_count
);
2657 if (clobber_only_eh_bb_p (bb
))
2659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2660 fprintf (dump_file
, "\n Ignoring BB %i;"
2661 " it will be optimized away by cleanup_clobbers\n",
2666 /* TODO: Obviously predicates can be propagated down across CFG. */
2670 bb_predicate
= *(predicate
*) bb
->aux
;
2672 bb_predicate
= false;
2675 bb_predicate
= true;
2677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2679 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2680 bb_predicate
.dump (dump_file
, info
->conds
);
2683 if (fbi
.info
&& nonconstant_names
.exists ())
2685 predicate phi_predicate
;
2686 bool first_phi
= true;
2688 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2692 && !phi_result_unknown_predicate (&fbi
, info
,
2699 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2701 fprintf (dump_file
, " ");
2702 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2704 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2709 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2711 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2712 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2714 gimple
*stmt
= gsi_stmt (bsi
);
2715 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2716 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2718 predicate will_be_nonconstant
;
2720 /* This relation stmt should be folded after we remove
2721 __builtin_expect call. Adjust the cost here. */
2722 if (stmt
== fix_builtin_expect_stmt
)
2728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2730 fprintf (dump_file
, " ");
2731 print_gimple_stmt (dump_file
, stmt
, 0);
2732 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2733 freq
.to_double (), this_size
,
2737 if (is_gimple_call (stmt
)
2738 && !gimple_call_internal_p (stmt
))
2740 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2741 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2743 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2744 resolved as constant. We however don't want to optimize
2745 out the cgraph edges. */
2746 if (nonconstant_names
.exists ()
2747 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2748 && gimple_call_lhs (stmt
)
2749 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2751 predicate false_p
= false;
2752 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2755 if (ipa_node_params_sum
)
2757 int count
= gimple_call_num_args (stmt
);
2761 es
->param
.safe_grow_cleared (count
, true);
2762 for (i
= 0; i
< count
; i
++)
2764 int prob
= param_change_prob (&fbi
, stmt
, i
);
2765 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2766 es
->param
[i
].change_prob
= prob
;
2767 es
->param
[i
].points_to_local_or_readonly_memory
2768 = points_to_local_or_readonly_memory_p
2769 (gimple_call_arg (stmt
, i
));
2773 es
->call_stmt_size
= this_size
;
2774 es
->call_stmt_time
= this_time
;
2775 es
->loop_depth
= bb_loop_depth (bb
);
2776 edge_set_predicate (edge
, &bb_predicate
);
2777 if (edge
->speculative
)
2779 cgraph_edge
*indirect
2780 = edge
->speculative_call_indirect_edge ();
2781 ipa_call_summary
*es2
2782 = ipa_call_summaries
->get_create (indirect
);
2783 ipa_call_summaries
->duplicate (edge
, indirect
,
2786 /* Edge is the first direct call.
2787 create and duplicate call summaries for multiple
2788 speculative call targets. */
2789 for (cgraph_edge
*direct
2790 = edge
->next_speculative_call_target ();
2792 direct
= direct
->next_speculative_call_target ())
2794 ipa_call_summary
*es3
2795 = ipa_call_summaries
->get_create (direct
);
2796 ipa_call_summaries
->duplicate (edge
, direct
,
2802 /* TODO: When conditional jump or switch is known to be constant, but
2803 we did not translate it into the predicates, we really can account
2804 just maximum of the possible paths. */
2807 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2808 stmt
, nonconstant_names
);
2810 will_be_nonconstant
= true;
2811 if (this_time
|| this_size
)
2813 sreal final_time
= (sreal
)this_time
* freq
;
2815 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2816 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2818 "\t\t50%% will be eliminated by inlining\n");
2819 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2820 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2822 class predicate p
= bb_predicate
& will_be_nonconstant
;
2824 /* We can ignore statement when we proved it is never going
2825 to happen, but we cannot do that for call statements
2826 because edges are accounted specially. */
2828 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2834 /* We account everything but the calls. Calls have their own
2835 size/time info attached to cgraph edges. This is necessary
2836 in order to make the cost disappear after inlining. */
2837 if (!is_gimple_call (stmt
))
2841 predicate ip
= bb_predicate
& predicate::not_inlined ();
2842 info
->account_size_time (this_size
* prob
,
2843 (final_time
* prob
) / 2, ip
,
2847 info
->account_size_time (this_size
* (2 - prob
),
2848 (final_time
* (2 - prob
) / 2),
2853 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2855 info
->fp_expressions
= true;
2857 fprintf (dump_file
, " fp_expression set\n");
2861 /* Account cost of address calculations in the statements. */
2862 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2864 for (tree op
= gimple_op (stmt
, i
);
2865 op
&& handled_component_p (op
);
2866 op
= TREE_OPERAND (op
, 0))
2867 if ((TREE_CODE (op
) == ARRAY_REF
2868 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2869 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2871 predicate p
= bb_predicate
;
2873 p
= p
& will_be_nonconstant_expr_predicate
2874 (&fbi
, info
, params_summary
,
2875 TREE_OPERAND (op
, 1),
2883 "\t\tAccounting address calculation.\n");
2884 info
->account_size_time (ipa_fn_summary::size_scale
,
2896 if (nonconstant_names
.exists () && !early
)
2898 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2900 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
2901 param_ipa_max_loop_predicates
);
2903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2904 flow_loops_dump (dump_file
, NULL
, 0);
2906 FOR_EACH_LOOP (loop
, 0)
2908 predicate loop_iterations
= true;
2912 class tree_niter_desc niter_desc
;
2913 if (!loop
->header
->aux
)
2916 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2917 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2919 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2920 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
2921 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2922 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2923 && !is_gimple_min_invariant (niter_desc
.niter
))
2925 predicate will_be_nonconstant
2926 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2930 if (will_be_nonconstant
!= true)
2931 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2932 if (will_be_nonconstant
!= true
2933 && will_be_nonconstant
!= false)
2934 loop_iterations
&= will_be_nonconstant
;
2936 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
2937 phdr_freq
, max_loop_predicates
);
2940 /* To avoid quadratic behavior we analyze stride predicates only
2941 with respect to the containing loop. Thus we simply iterate
2942 over all defs in the outermost loop body. */
2943 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2944 loop
!= NULL
; loop
= loop
->next
)
2946 predicate loop_stride
= true;
2947 basic_block
*body
= get_loop_body (loop
);
2948 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2949 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2950 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2952 gimple_stmt_iterator gsi
;
2956 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2957 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2960 gimple
*stmt
= gsi_stmt (gsi
);
2962 if (!is_gimple_assign (stmt
))
2965 tree def
= gimple_assign_lhs (stmt
);
2966 if (TREE_CODE (def
) != SSA_NAME
)
2970 if (!simple_iv (loop_containing_stmt (stmt
),
2971 loop_containing_stmt (stmt
),
2973 || is_gimple_min_invariant (iv
.step
))
2976 predicate will_be_nonconstant
2977 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2981 if (will_be_nonconstant
!= true)
2982 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2983 if (will_be_nonconstant
!= true
2984 && will_be_nonconstant
!= false)
2985 loop_stride
= loop_stride
& will_be_nonconstant
;
2988 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
2989 phdr_freq
, max_loop_predicates
);
2994 FOR_ALL_BB_FN (bb
, my_function
)
3000 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
3002 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3005 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
3009 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3010 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3012 ss
->self_size
= size
;
3013 nonconstant_names
.release ();
3014 ipa_release_body_info (&fbi
);
3015 if (opt_for_fn (node
->decl
, optimize
))
3018 loop_optimizer_finalize ();
3019 else if (!ipa_edge_args_sum
)
3020 ipa_free_all_node_params ();
3021 free_dominance_info (CDI_DOMINATORS
);
3022 free_dominance_info (CDI_POST_DOMINATORS
);
3026 fprintf (dump_file
, "\n");
3027 ipa_dump_fn_summary (dump_file
, node
);
3032 /* Compute function summary.
3033 EARLY is true when we compute parameters during early opts. */
3036 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3038 HOST_WIDE_INT self_stack_size
;
3039 struct cgraph_edge
*e
;
3041 gcc_assert (!node
->inlined_to
);
3043 if (!ipa_fn_summaries
)
3044 ipa_fn_summary_alloc ();
3046 /* Create a new ipa_fn_summary. */
3047 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3048 ipa_fn_summaries
->remove (node
);
3049 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3050 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3052 /* Estimate the stack size for the function if we're optimizing. */
3053 self_stack_size
= optimize
&& !node
->thunk
3054 ? estimated_stack_frame_size (node
) : 0;
3055 size_info
->estimated_self_stack_size
= self_stack_size
;
3056 info
->estimated_stack_size
= self_stack_size
;
3060 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3063 node
->can_change_signature
= false;
3064 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3065 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3066 info
->account_size_time (ipa_fn_summary::size_scale
3067 * opt_for_fn (node
->decl
,
3068 param_uninlined_function_thunk_insns
),
3069 opt_for_fn (node
->decl
,
3070 param_uninlined_function_thunk_time
), t
, t
);
3071 t
= predicate::not_inlined ();
3072 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3073 ipa_update_overall_fn_summary (node
);
3074 size_info
->self_size
= size_info
->size
;
3075 if (stdarg_p (TREE_TYPE (node
->decl
)))
3077 info
->inlinable
= false;
3078 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3081 info
->inlinable
= true;
3085 /* Even is_gimple_min_invariant rely on current_function_decl. */
3086 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3088 /* During IPA profile merging we may be called w/o virtual SSA form
3090 update_ssa (TODO_update_ssa_only_virtuals
);
3092 /* Can this function be inlined at all? */
3093 if (!opt_for_fn (node
->decl
, optimize
)
3094 && !lookup_attribute ("always_inline",
3095 DECL_ATTRIBUTES (node
->decl
)))
3096 info
->inlinable
= false;
3098 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3100 /* Type attributes can use parameter indices to describe them. */
3101 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
3102 /* Likewise for #pragma omp declare simd functions or functions
3103 with simd attribute. */
3104 || lookup_attribute ("omp declare simd",
3105 DECL_ATTRIBUTES (node
->decl
)))
3106 node
->can_change_signature
= false;
3109 /* Otherwise, inlinable functions always can change signature. */
3110 if (info
->inlinable
)
3111 node
->can_change_signature
= true;
3114 /* Functions calling builtin_apply cannot change signature. */
3115 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3117 tree
cdecl = e
->callee
->decl
;
3118 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
3119 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
3122 node
->can_change_signature
= !e
;
3125 analyze_function_body (node
, early
);
3129 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3130 size_info
->size
= size_info
->self_size
;
3131 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3133 /* Code above should compute exactly the same result as
3134 ipa_update_overall_fn_summary but because computation happens in
3135 different order the roundoff errors result in slight changes. */
3136 ipa_update_overall_fn_summary (node
);
3137 /* In LTO mode we may have speculative edges set. */
3138 gcc_assert (in_lto_p
|| size_info
->size
== size_info
->self_size
);
3142 /* Compute parameters of functions used by inliner using
3143 current_function_decl. */
3146 compute_fn_summary_for_current (void)
3148 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3152 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3153 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3154 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3157 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3158 int *size
, int *time
,
3159 ipa_call_arg_values
*avals
)
3162 struct cgraph_node
*callee
;
3163 class ipa_fn_summary
*isummary
;
3164 enum availability avail
;
3168 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3170 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3173 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3174 if (!target
|| speculative
)
3177 /* Account for difference in cost between indirect and direct calls. */
3178 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3179 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3180 gcc_checking_assert (*time
>= 0);
3181 gcc_checking_assert (*size
>= 0);
3183 callee
= cgraph_node::get (target
);
3184 if (!callee
|| !callee
->definition
)
3186 callee
= callee
->function_symbol (&avail
);
3187 if (avail
< AVAIL_AVAILABLE
)
3189 isummary
= ipa_fn_summaries
->get (callee
);
3190 if (isummary
== NULL
)
3193 return isummary
->inlinable
;
3196 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3197 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3198 devirtualized. AVALS, if non-NULL, describes the context of the call site
3199 as far as values of parameters are concerened. */
3202 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3203 sreal
*time
, ipa_call_arg_values
*avals
,
3206 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3207 int call_size
= es
->call_stmt_size
;
3208 int call_time
= es
->call_stmt_time
;
3211 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3212 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3213 *hints
|= INLINE_HINT_indirect_call
;
3214 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3217 *min_size
+= cur_size
;
3219 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3223 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3224 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3227 Helper for estimate_calls_size_and_time which does the same but
3228 (in most cases) faster. */
3231 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3232 int *min_size
, sreal
*time
,
3234 clause_t possible_truths
,
3235 ipa_call_arg_values
*avals
)
3237 struct cgraph_edge
*e
;
3238 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3240 if (!e
->inline_failed
)
3242 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3243 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3244 hints
, possible_truths
, avals
);
3248 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3250 /* Do not care about zero sized builtins. */
3251 if (!es
->call_stmt_size
)
3253 gcc_checking_assert (!es
->call_stmt_time
);
3257 || es
->predicate
->evaluate (possible_truths
))
3259 /* Predicates of calls shall not use NOT_CHANGED codes,
3260 so we do not need to compute probabilities. */
3261 estimate_edge_size_and_time (e
, size
,
3262 es
->predicate
? NULL
: min_size
,
3263 time
, avals
, hints
);
3266 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3268 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3270 || es
->predicate
->evaluate (possible_truths
))
3271 estimate_edge_size_and_time (e
, size
,
3272 es
->predicate
? NULL
: min_size
,
3273 time
, avals
, hints
);
3277 /* Populate sum->call_size_time_table for edges from NODE. */
3280 summarize_calls_size_and_time (struct cgraph_node
*node
,
3281 ipa_fn_summary
*sum
)
3283 struct cgraph_edge
*e
;
3284 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3286 if (!e
->inline_failed
)
3288 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3289 summarize_calls_size_and_time (e
->callee
, sum
);
3295 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3297 struct predicate pred
= true;
3298 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3301 pred
= *es
->predicate
;
3302 sum
->account_size_time (size
, time
, pred
, pred
, true);
3304 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3309 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3310 struct predicate pred
= true;
3311 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3314 pred
= *es
->predicate
;
3315 sum
->account_size_time (size
, time
, pred
, pred
, true);
3319 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3320 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3321 context of the call site. */
3324 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3325 int *min_size
, sreal
*time
,
3327 clause_t possible_truths
,
3328 ipa_call_arg_values
*avals
)
3330 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3331 bool use_table
= true;
3333 gcc_assert (node
->callees
|| node
->indirect_calls
);
3335 /* During early inlining we do not calculate info for very
3336 large functions and thus there is no need for producing
3338 if (!ipa_node_params_sum
)
3340 /* Do not calculate summaries for simple wrappers; it is waste
3342 else if (node
->callees
&& node
->indirect_calls
3343 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3345 /* If there is an indirect edge that may be optimized, we need
3346 to go the slow way. */
3347 else if (avals
&& hints
3348 && (avals
->m_known_vals
.length ()
3349 || avals
->m_known_contexts
.length ()
3350 || avals
->m_known_aggs
.length ()))
3352 class ipa_node_params
*params_summary
= IPA_NODE_REF (node
);
3353 unsigned int nargs
= params_summary
3354 ? ipa_get_param_count (params_summary
) : 0;
3356 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3358 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3359 && (avals
->safe_sval_at (i
)
3360 || (avals
->m_known_aggs
.length () > i
3361 && avals
->m_known_aggs
[i
].items
.length ())))
3363 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3364 && (avals
->m_known_contexts
.length () > i
3365 && !avals
->m_known_contexts
[i
].useless_p ()))
3370 /* Fast path is via the call size time table. */
3373 /* Build summary if it is absent. */
3374 if (!sum
->call_size_time_table
.length ())
3376 predicate true_pred
= true;
3377 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3378 summarize_calls_size_and_time (node
, sum
);
3381 int old_size
= *size
;
3382 sreal old_time
= time
? *time
: 0;
3385 *min_size
+= sum
->call_size_time_table
[0].size
;
3390 /* Walk the table and account sizes and times. */
3391 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3393 if (e
->exec_predicate
.evaluate (possible_truths
))
3400 /* Be careful and see if both methods agree. */
3401 if ((flag_checking
|| dump_file
)
3402 /* Do not try to sanity check when we know we lost some
3404 && sum
->call_size_time_table
.length ()
3405 < ipa_fn_summary::max_size_time_table_size
)
3407 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3408 possible_truths
, avals
);
3409 gcc_assert (*size
== old_size
);
3410 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3412 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3413 old_time
.to_double (),
3414 time
->to_double ());
3417 /* Slow path by walking all edges. */
3419 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3420 possible_truths
, avals
);
3423 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3424 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3427 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3428 clause_t nonspec_possible_truths
,
3429 vec
<inline_param_summary
>
3430 inline_param_summary
,
3431 ipa_auto_call_arg_values
*arg_values
)
3432 : m_node (node
), m_possible_truths (possible_truths
),
3433 m_nonspec_possible_truths (nonspec_possible_truths
),
3434 m_inline_param_summary (inline_param_summary
),
3435 m_avals (arg_values
)
3439 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3442 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3444 m_node
= ctx
.m_node
;
3445 m_possible_truths
= ctx
.m_possible_truths
;
3446 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3447 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3448 unsigned int nargs
= params_summary
3449 ? ipa_get_param_count (params_summary
) : 0;
3451 m_inline_param_summary
= vNULL
;
3452 /* Copy the info only if there is at least one useful entry. */
3453 if (ctx
.m_inline_param_summary
.exists ())
3455 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3457 for (unsigned int i
= 0; i
< n
; i
++)
3458 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3459 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3461 m_inline_param_summary
3462 = ctx
.m_inline_param_summary
.copy ();
3466 m_avals
.m_known_vals
= vNULL
;
3467 if (ctx
.m_avals
.m_known_vals
.exists ())
3469 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3471 for (unsigned int i
= 0; i
< n
; i
++)
3472 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3473 && ctx
.m_avals
.m_known_vals
[i
])
3475 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3480 m_avals
.m_known_contexts
= vNULL
;
3481 if (ctx
.m_avals
.m_known_contexts
.exists ())
3483 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3485 for (unsigned int i
= 0; i
< n
; i
++)
3486 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3487 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3489 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3494 m_avals
.m_known_aggs
= vNULL
;
3495 if (ctx
.m_avals
.m_known_aggs
.exists ())
3497 unsigned int n
= MIN (ctx
.m_avals
.m_known_aggs
.length (), nargs
);
3499 for (unsigned int i
= 0; i
< n
; i
++)
3500 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3501 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3503 m_avals
.m_known_aggs
3504 = ipa_copy_agg_values (ctx
.m_avals
.m_known_aggs
);
3509 m_avals
.m_known_value_ranges
= vNULL
;
3512 /* Release memory used by known_vals/contexts/aggs vectors. and
3513 inline_param_summary. */
3516 ipa_cached_call_context::release ()
3518 /* See if context is initialized at first place. */
3521 ipa_release_agg_values (m_avals
.m_known_aggs
, true);
3522 m_avals
.m_known_vals
.release ();
3523 m_avals
.m_known_contexts
.release ();
3524 m_inline_param_summary
.release ();
3527 /* Return true if CTX describes the same call context as THIS. */
3530 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3532 if (m_node
!= ctx
.m_node
3533 || m_possible_truths
!= ctx
.m_possible_truths
3534 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3537 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3538 unsigned int nargs
= params_summary
3539 ? ipa_get_param_count (params_summary
) : 0;
3541 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3543 for (unsigned int i
= 0; i
< nargs
; i
++)
3545 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3547 if (i
>= m_inline_param_summary
.length ()
3548 || m_inline_param_summary
[i
].useless_p ())
3550 if (i
< ctx
.m_inline_param_summary
.length ()
3551 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3555 if (i
>= ctx
.m_inline_param_summary
.length ()
3556 || ctx
.m_inline_param_summary
[i
].useless_p ())
3558 if (i
< m_inline_param_summary
.length ()
3559 && !m_inline_param_summary
[i
].useless_p ())
3563 if (!m_inline_param_summary
[i
].equal_to
3564 (ctx
.m_inline_param_summary
[i
]))
3568 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3570 for (unsigned int i
= 0; i
< nargs
; i
++)
3572 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3574 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3576 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3577 && ctx
.m_avals
.m_known_vals
[i
])
3581 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3582 || !ctx
.m_avals
.m_known_vals
[i
])
3584 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3588 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3592 if (m_avals
.m_known_contexts
.exists ()
3593 || ctx
.m_avals
.m_known_contexts
.exists ())
3595 for (unsigned int i
= 0; i
< nargs
; i
++)
3597 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3599 if (i
>= m_avals
.m_known_contexts
.length ()
3600 || m_avals
.m_known_contexts
[i
].useless_p ())
3602 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3603 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3607 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3608 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3610 if (i
< m_avals
.m_known_contexts
.length ()
3611 && !m_avals
.m_known_contexts
[i
].useless_p ())
3615 if (!m_avals
.m_known_contexts
[i
].equal_to
3616 (ctx
.m_avals
.m_known_contexts
[i
]))
3620 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3622 for (unsigned int i
= 0; i
< nargs
; i
++)
3624 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3626 if (i
>= m_avals
.m_known_aggs
.length ()
3627 || m_avals
.m_known_aggs
[i
].is_empty ())
3629 if (i
< ctx
.m_avals
.m_known_aggs
.length ()
3630 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3634 if (i
>= ctx
.m_avals
.m_known_aggs
.length ()
3635 || ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3637 if (i
< m_avals
.m_known_aggs
.length ()
3638 && !m_avals
.m_known_aggs
[i
].is_empty ())
3642 if (!m_avals
.m_known_aggs
[i
].equal_to (ctx
.m_avals
.m_known_aggs
[i
]))
3649 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3650 this context. Always compute size and min_size. Only compute time and
3651 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3655 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3656 bool est_times
, bool est_hints
)
3658 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3663 ipa_hints hints
= 0;
3664 sreal loops_with_known_iterations
= 0;
3665 sreal loops_with_known_strides
= 0;
3668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3671 fprintf (dump_file
, " Estimating body: %s\n"
3672 " Known to be false: ", m_node
->dump_name ());
3674 for (i
= predicate::not_inlined_condition
;
3675 i
< (predicate::first_dynamic_condition
3676 + (int) vec_safe_length (info
->conds
)); i
++)
3677 if (!(m_possible_truths
& (1 << i
)))
3680 fprintf (dump_file
, ", ");
3682 dump_condition (dump_file
, info
->conds
, i
);
3686 if (m_node
->callees
|| m_node
->indirect_calls
)
3687 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3688 est_times
? &time
: NULL
,
3689 est_hints
? &hints
: NULL
, m_possible_truths
,
3692 sreal nonspecialized_time
= time
;
3694 min_size
+= info
->size_time_table
[0].size
;
3695 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3697 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3699 /* Because predicates are conservative, it can happen that nonconst is 1
3703 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3705 gcc_checking_assert (e
->time
>= 0);
3706 gcc_checking_assert (time
>= 0);
3708 /* We compute specialized size only because size of nonspecialized
3709 copy is context independent.
3711 The difference between nonspecialized execution and specialized is
3712 that nonspecialized is not going to have optimized out computations
3713 known to be constant in a specialized setting. */
3718 nonspecialized_time
+= e
->time
;
3721 else if (!m_inline_param_summary
.exists ())
3728 int prob
= e
->nonconst_predicate
.probability
3729 (info
->conds
, m_possible_truths
,
3730 m_inline_param_summary
);
3731 gcc_checking_assert (prob
>= 0);
3732 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3733 if (prob
== REG_BR_PROB_BASE
)
3736 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3738 gcc_checking_assert (time
>= 0);
3741 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3742 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3743 gcc_checking_assert (min_size
>= 0);
3744 gcc_checking_assert (size
>= 0);
3745 gcc_checking_assert (time
>= 0);
3746 /* nonspecialized_time should be always bigger than specialized time.
3747 Roundoff issues however may get into the way. */
3748 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3750 /* Roundoff issues may make specialized time bigger than nonspecialized
3751 time. We do not really want that to happen because some heuristics
3752 may get confused by seeing negative speedups. */
3753 if (time
> nonspecialized_time
)
3754 time
= nonspecialized_time
;
3759 hints
|= INLINE_HINT_in_scc
;
3760 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3761 hints
|= INLINE_HINT_declared_inline
;
3762 if (info
->builtin_constant_p_parms
.length ()
3763 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3764 hints
|= INLINE_HINT_builtin_constant_p
;
3766 ipa_freqcounting_predicate
*fcp
;
3767 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3768 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3770 hints
|= INLINE_HINT_loop_iterations
;
3771 loops_with_known_iterations
+= fcp
->freq
;
3773 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3775 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3776 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3778 hints
|= INLINE_HINT_loop_stride
;
3779 loops_with_known_strides
+= fcp
->freq
;
3781 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3784 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3785 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3789 fprintf (dump_file
, "\n size:%i", (int) size
);
3791 fprintf (dump_file
, " time:%f nonspec time:%f",
3792 time
.to_double (), nonspecialized_time
.to_double ());
3794 fprintf (dump_file
, " loops with known iterations:%f "
3795 "known strides:%f", loops_with_known_iterations
.to_double (),
3796 loops_with_known_strides
.to_double ());
3797 fprintf (dump_file
, "\n");
3801 estimates
->time
= time
;
3802 estimates
->nonspecialized_time
= nonspecialized_time
;
3804 estimates
->size
= size
;
3805 estimates
->min_size
= min_size
;
3807 estimates
->hints
= hints
;
3812 /* Estimate size and time needed to execute callee of EDGE assuming that
3813 parameters known to be constant at caller of EDGE are propagated.
3814 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3815 and types for parameters. */
3818 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3819 ipa_auto_call_arg_values
*avals
,
3820 ipa_call_estimates
*estimates
)
3822 clause_t clause
, nonspec_clause
;
3824 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
3826 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
3827 ctx
.estimate_size_and_time (estimates
);
3830 /* Return stack frame offset where frame of NODE is supposed to start inside
3831 of the function it is inlined to.
3832 Return 0 for functions that are not inlined. */
3835 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3837 HOST_WIDE_INT offset
= 0;
3838 if (!node
->inlined_to
)
3840 node
= node
->callers
->caller
;
3843 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3844 if (!node
->inlined_to
)
3846 node
= node
->callers
->caller
;
3851 /* Update summary information of inline clones after inlining.
3852 Compute peak stack usage. */
3855 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3857 struct cgraph_edge
*e
;
3859 ipa_propagate_frequency (node
);
3860 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3862 if (!e
->inline_failed
)
3863 inline_update_callee_summaries (e
->callee
, depth
);
3865 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3867 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3868 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3871 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3872 INLINED_EDGE has been inlined.
3874 When function A is inlined in B and A calls C with parameter that
3875 changes with probability PROB1 and C is known to be passthrough
3876 of argument if B that change with probability PROB2, the probability
3877 of change is now PROB1*PROB2. */
3880 remap_edge_params (struct cgraph_edge
*inlined_edge
,
3881 struct cgraph_edge
*edge
)
3883 if (ipa_node_params_sum
)
3886 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3889 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3890 class ipa_call_summary
*inlined_es
3891 = ipa_call_summaries
->get (inlined_edge
);
3893 if (es
->param
.length () == 0)
3896 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3898 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3899 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3900 || jfunc
->type
== IPA_JF_ANCESTOR
)
3902 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3903 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3904 : ipa_get_jf_ancestor_formal_id (jfunc
);
3905 if (id
< (int) inlined_es
->param
.length ())
3907 int prob1
= es
->param
[i
].change_prob
;
3908 int prob2
= inlined_es
->param
[id
].change_prob
;
3909 int prob
= combine_probabilities (prob1
, prob2
);
3911 if (prob1
&& prob2
&& !prob
)
3914 es
->param
[i
].change_prob
= prob
;
3917 ->param
[id
].points_to_local_or_readonly_memory
)
3918 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3920 if (!es
->param
[i
].points_to_local_or_readonly_memory
3921 && jfunc
->type
== IPA_JF_CONST
3922 && points_to_local_or_readonly_memory_p
3923 (ipa_get_jf_constant (jfunc
)))
3924 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3930 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3932 Remap predicates of callees of NODE. Rest of arguments match
3935 Also update change probabilities. */
3938 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
3939 struct cgraph_node
*node
,
3940 class ipa_fn_summary
*info
,
3941 class ipa_node_params
*params_summary
,
3942 class ipa_fn_summary
*callee_info
,
3943 vec
<int> operand_map
,
3944 vec
<HOST_WIDE_INT
> offset_map
,
3945 clause_t possible_truths
,
3946 predicate
*toplev_predicate
)
3948 struct cgraph_edge
*e
, *next
;
3949 for (e
= node
->callees
; e
; e
= next
)
3952 next
= e
->next_callee
;
3954 if (e
->inline_failed
)
3956 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3957 remap_edge_params (inlined_edge
, e
);
3961 p
= es
->predicate
->remap_after_inlining
3962 (info
, params_summary
,
3963 callee_info
, operand_map
,
3964 offset_map
, possible_truths
,
3966 edge_set_predicate (e
, &p
);
3969 edge_set_predicate (e
, toplev_predicate
);
3972 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
3973 params_summary
, callee_info
,
3974 operand_map
, offset_map
, possible_truths
,
3977 for (e
= node
->indirect_calls
; e
; e
= next
)
3979 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3981 next
= e
->next_callee
;
3983 remap_edge_params (inlined_edge
, e
);
3986 p
= es
->predicate
->remap_after_inlining
3987 (info
, params_summary
,
3988 callee_info
, operand_map
, offset_map
,
3989 possible_truths
, *toplev_predicate
);
3990 edge_set_predicate (e
, &p
);
3993 edge_set_predicate (e
, toplev_predicate
);
3997 /* Run remap_after_inlining on each predicate in V. */
4000 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4001 class ipa_node_params
*params_summary
,
4002 class ipa_fn_summary
*callee_info
,
4003 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4004 vec
<int> operand_map
,
4005 vec
<HOST_WIDE_INT
> offset_map
,
4006 clause_t possible_truths
,
4007 predicate
*toplev_predicate
)
4010 ipa_freqcounting_predicate
*fcp
;
4011 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4014 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4015 callee_info
, operand_map
,
4016 offset_map
, possible_truths
,
4018 if (p
!= false && p
!= true)
4019 *fcp
->predicate
&= p
;
4023 /* We inlined EDGE. Update summary of the function we inlined into. */
4026 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4028 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4029 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4030 ? edge
->caller
->inlined_to
: edge
->caller
);
4031 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4032 clause_t clause
= 0; /* not_inline is known to be false. */
4034 auto_vec
<int, 8> operand_map
;
4035 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4037 predicate toplev_predicate
;
4038 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4039 class ipa_node_params
*params_summary
= (ipa_node_params_sum
4040 ? IPA_NODE_REF (to
) : NULL
);
4043 toplev_predicate
= *es
->predicate
;
4045 toplev_predicate
= true;
4047 info
->fp_expressions
|= callee_info
->fp_expressions
;
4049 if (callee_info
->conds
)
4051 ipa_auto_call_arg_values avals
;
4052 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4054 if (ipa_node_params_sum
&& callee_info
->conds
)
4056 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
4057 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4062 operand_map
.safe_grow_cleared (count
, true);
4063 offset_map
.safe_grow_cleared (count
, true);
4065 for (i
= 0; i
< count
; i
++)
4067 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4070 /* TODO: handle non-NOPs when merging. */
4071 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4073 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4074 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4075 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4078 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4080 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4081 if (offset
>= 0 && offset
< INT_MAX
)
4083 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4084 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4086 offset_map
[i
] = offset
;
4089 operand_map
[i
] = map
;
4090 gcc_assert (map
< ipa_get_param_count (params_summary
));
4094 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4095 if (ip
< count
&& operand_map
[ip
] >= 0)
4096 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4098 sreal freq
= edge
->sreal_frequency ();
4099 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4102 p
= e
->exec_predicate
.remap_after_inlining
4103 (info
, params_summary
,
4104 callee_info
, operand_map
,
4107 predicate nonconstp
;
4108 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4109 (info
, params_summary
,
4110 callee_info
, operand_map
,
4113 if (p
!= false && nonconstp
!= false)
4115 sreal add_time
= ((sreal
)e
->time
* freq
);
4116 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4118 if (prob
!= REG_BR_PROB_BASE
)
4119 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4120 if (prob
!= REG_BR_PROB_BASE
4121 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4123 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4124 (double) prob
/ REG_BR_PROB_BASE
);
4126 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4129 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4130 callee_info
, operand_map
,
4131 offset_map
, clause
, &toplev_predicate
);
4132 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4133 info
->loop_iterations
, operand_map
,
4134 offset_map
, clause
, &toplev_predicate
);
4135 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4136 info
->loop_strides
, operand_map
,
4137 offset_map
, clause
, &toplev_predicate
);
4139 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4140 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4142 if (info
->estimated_stack_size
< peak
)
4143 info
->estimated_stack_size
= peak
;
4145 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4146 if (info
->call_size_time_table
.length ())
4149 sreal edge_time
= 0;
4151 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4152 /* Unaccount size and time of the optimized out call. */
4153 info
->account_size_time (-edge_size
, -edge_time
,
4154 es
->predicate
? *es
->predicate
: true,
4155 es
->predicate
? *es
->predicate
: true,
4157 /* Account new calls. */
4158 summarize_calls_size_and_time (edge
->callee
, info
);
4161 /* Free summaries that are not maintained for inline clones/edges. */
4162 ipa_call_summaries
->remove (edge
);
4163 ipa_fn_summaries
->remove (edge
->callee
);
4164 ipa_remove_from_growth_caches (edge
);
4167 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4168 overall size and time. Recompute it.
4169 If RESET is true also recompute call_time_size_table. */
4172 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4174 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4175 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4179 size_info
->size
= 0;
4181 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4183 size_info
->size
+= e
->size
;
4184 info
->time
+= e
->time
;
4186 info
->min_size
= info
->size_time_table
[0].size
;
4188 info
->call_size_time_table
.release ();
4189 if (node
->callees
|| node
->indirect_calls
)
4190 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4192 ~(clause_t
) (1 << predicate::false_condition
),
4194 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4195 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4199 /* This function performs intraprocedural analysis in NODE that is required to
4200 inline indirect calls. */
4203 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4205 ipa_analyze_node (node
);
4206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4208 ipa_print_node_params (dump_file
, node
);
4209 ipa_print_node_jump_functions (dump_file
, node
);
4214 /* Note function body size. */
4217 inline_analyze_function (struct cgraph_node
*node
)
4219 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4222 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4223 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4224 inline_indirect_intraprocedural_analysis (node
);
4225 compute_fn_summary (node
, false);
4228 struct cgraph_edge
*e
;
4229 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4230 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4231 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4232 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4239 /* Called when new function is inserted to callgraph late. */
4242 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4244 inline_analyze_function (node
);
4247 /* Note function body size. */
4250 ipa_fn_summary_generate (void)
4252 struct cgraph_node
*node
;
4254 FOR_EACH_DEFINED_FUNCTION (node
)
4255 if (DECL_STRUCT_FUNCTION (node
->decl
))
4256 node
->versionable
= tree_versionable_function_p (node
->decl
);
4258 ipa_fn_summary_alloc ();
4260 ipa_fn_summaries
->enable_insertion_hook ();
4262 ipa_register_cgraph_hooks ();
4264 FOR_EACH_DEFINED_FUNCTION (node
)
4266 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4267 || opt_for_fn (node
->decl
, optimize
)))
4268 inline_analyze_function (node
);
4272 /* Write inline summary for edge E to OB. */
4275 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4278 class ipa_call_summary
*es
= prevails
4279 ? ipa_call_summaries
->get_create (e
) : NULL
;
4283 int size
= streamer_read_uhwi (ib
);
4284 int time
= streamer_read_uhwi (ib
);
4285 int depth
= streamer_read_uhwi (ib
);
4289 es
->call_stmt_size
= size
;
4290 es
->call_stmt_time
= time
;
4291 es
->loop_depth
= depth
;
4294 bitpack_d bp
= streamer_read_bitpack (ib
);
4296 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4298 bp_unpack_value (&bp
, 1);
4302 edge_set_predicate (e
, &p
);
4303 length
= streamer_read_uhwi (ib
);
4305 && (e
->possibly_call_in_translation_unit_p ()
4306 /* Also stream in jump functions to builtins in hope that they
4307 will get fnspecs. */
4308 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4310 es
->param
.safe_grow_cleared (length
, true);
4311 for (i
= 0; i
< length
; i
++)
4313 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4314 es
->param
[i
].points_to_local_or_readonly_memory
4315 = streamer_read_uhwi (ib
);
4320 for (i
= 0; i
< length
; i
++)
4322 streamer_read_uhwi (ib
);
4323 streamer_read_uhwi (ib
);
4329 /* Stream in inline summaries from the section. */
4332 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4335 const struct lto_function_header
*header
=
4336 (const struct lto_function_header
*) data
;
4337 const int cfg_offset
= sizeof (struct lto_function_header
);
4338 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4339 const int string_offset
= main_offset
+ header
->main_size
;
4340 class data_in
*data_in
;
4341 unsigned int i
, count2
, j
;
4342 unsigned int f_count
;
4344 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4345 file_data
->mode_table
);
4348 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4349 header
->string_size
, vNULL
);
4350 f_count
= streamer_read_uhwi (&ib
);
4351 for (i
= 0; i
< f_count
; i
++)
4354 struct cgraph_node
*node
;
4355 class ipa_fn_summary
*info
;
4356 class ipa_node_params
*params_summary
;
4357 class ipa_size_summary
*size_info
;
4358 lto_symtab_encoder_t encoder
;
4359 struct bitpack_d bp
;
4360 struct cgraph_edge
*e
;
4363 index
= streamer_read_uhwi (&ib
);
4364 encoder
= file_data
->symtab_node_encoder
;
4365 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4367 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4368 params_summary
= node
->prevailing_p () ? IPA_NODE_REF (node
) : NULL
;
4369 size_info
= node
->prevailing_p ()
4370 ? ipa_size_summaries
->get_create (node
) : NULL
;
4372 int stack_size
= streamer_read_uhwi (&ib
);
4373 int size
= streamer_read_uhwi (&ib
);
4374 sreal time
= sreal::stream_in (&ib
);
4378 info
->estimated_stack_size
4379 = size_info
->estimated_self_stack_size
= stack_size
;
4380 size_info
->size
= size_info
->self_size
= size
;
4384 bp
= streamer_read_bitpack (&ib
);
4387 info
->inlinable
= bp_unpack_value (&bp
, 1);
4388 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4392 bp_unpack_value (&bp
, 1);
4393 bp_unpack_value (&bp
, 1);
4396 count2
= streamer_read_uhwi (&ib
);
4397 gcc_assert (!info
|| !info
->conds
);
4399 vec_safe_reserve_exact (info
->conds
, count2
);
4400 for (j
= 0; j
< count2
; j
++)
4403 unsigned int k
, count3
;
4404 c
.operand_num
= streamer_read_uhwi (&ib
);
4405 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4406 c
.type
= stream_read_tree (&ib
, data_in
);
4407 c
.val
= stream_read_tree (&ib
, data_in
);
4408 bp
= streamer_read_bitpack (&ib
);
4409 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4410 c
.by_ref
= bp_unpack_value (&bp
, 1);
4412 c
.offset
= streamer_read_uhwi (&ib
);
4413 count3
= streamer_read_uhwi (&ib
);
4416 vec_safe_reserve_exact (c
.param_ops
, count3
);
4418 ipa_set_param_used_by_ipa_predicates
4419 (params_summary
, c
.operand_num
, true);
4420 for (k
= 0; k
< count3
; k
++)
4422 struct expr_eval_op op
;
4423 enum gimple_rhs_class rhs_class
;
4424 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4425 op
.type
= stream_read_tree (&ib
, data_in
);
4426 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4428 case GIMPLE_UNARY_RHS
:
4430 op
.val
[0] = NULL_TREE
;
4431 op
.val
[1] = NULL_TREE
;
4434 case GIMPLE_BINARY_RHS
:
4435 case GIMPLE_TERNARY_RHS
:
4436 bp
= streamer_read_bitpack (&ib
);
4437 op
.index
= bp_unpack_value (&bp
, 2);
4438 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4439 if (rhs_class
== GIMPLE_BINARY_RHS
)
4440 op
.val
[1] = NULL_TREE
;
4442 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4446 fatal_error (UNKNOWN_LOCATION
,
4447 "invalid fnsummary in LTO stream");
4450 c
.param_ops
->quick_push (op
);
4453 info
->conds
->quick_push (c
);
4455 count2
= streamer_read_uhwi (&ib
);
4456 gcc_assert (!info
|| !info
->size_time_table
.length ());
4458 info
->size_time_table
.reserve_exact (count2
);
4459 for (j
= 0; j
< count2
; j
++)
4461 class size_time_entry e
;
4463 e
.size
= streamer_read_uhwi (&ib
);
4464 e
.time
= sreal::stream_in (&ib
);
4465 e
.exec_predicate
.stream_in (&ib
);
4466 e
.nonconst_predicate
.stream_in (&ib
);
4469 info
->size_time_table
.quick_push (e
);
4472 count2
= streamer_read_uhwi (&ib
);
4473 for (j
= 0; j
< count2
; j
++)
4476 sreal fcp_freq
= sreal::stream_in (&ib
);
4479 ipa_freqcounting_predicate fcp
;
4480 fcp
.predicate
= NULL
;
4481 set_hint_predicate (&fcp
.predicate
, p
);
4482 fcp
.freq
= fcp_freq
;
4483 vec_safe_push (info
->loop_iterations
, fcp
);
4486 count2
= streamer_read_uhwi (&ib
);
4487 for (j
= 0; j
< count2
; j
++)
4490 sreal fcp_freq
= sreal::stream_in (&ib
);
4493 ipa_freqcounting_predicate fcp
;
4494 fcp
.predicate
= NULL
;
4495 set_hint_predicate (&fcp
.predicate
, p
);
4496 fcp
.freq
= fcp_freq
;
4497 vec_safe_push (info
->loop_strides
, fcp
);
4500 count2
= streamer_read_uhwi (&ib
);
4502 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4503 for (j
= 0; j
< count2
; j
++)
4505 int parm
= streamer_read_uhwi (&ib
);
4507 info
->builtin_constant_p_parms
.quick_push (parm
);
4509 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4510 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4511 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4512 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4515 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4517 lto_data_in_delete (data_in
);
4521 /* Read inline summary. Jump functions are shared among ipa-cp
4522 and inliner, so when ipa-cp is active, we don't need to write them
4526 ipa_fn_summary_read (void)
4528 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4529 struct lto_file_decl_data
*file_data
;
4532 ipa_prop_read_jump_functions ();
4533 ipa_fn_summary_alloc ();
4535 while ((file_data
= file_data_vec
[j
++]))
4539 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4542 inline_read_section (file_data
, data
, len
);
4544 /* Fatal error here. We do not want to support compiling ltrans units
4545 with different version of compiler or different flags than the WPA
4546 unit, so this should never happen. */
4547 fatal_error (input_location
,
4548 "ipa inline summary is missing in input file");
4550 ipa_register_cgraph_hooks ();
4552 gcc_assert (ipa_fn_summaries
);
4553 ipa_fn_summaries
->enable_insertion_hook ();
4557 /* Write inline summary for edge E to OB. */
4560 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4562 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4565 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4566 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4567 streamer_write_uhwi (ob
, es
->loop_depth
);
4569 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4570 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4571 streamer_write_bitpack (&bp
);
4574 es
->predicate
->stream_out (ob
);
4576 streamer_write_uhwi (ob
, 0);
4577 streamer_write_uhwi (ob
, es
->param
.length ());
4578 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4580 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4581 streamer_write_uhwi (ob
, es
->param
[i
].points_to_local_or_readonly_memory
);
4586 /* Write inline summary for node in SET.
4587 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4588 active, we don't need to write them twice. */
4591 ipa_fn_summary_write (void)
4593 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4594 lto_symtab_encoder_iterator lsei
;
4595 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4596 unsigned int count
= 0;
4598 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4599 lsei_next_function_in_partition (&lsei
))
4601 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4602 if (cnode
->definition
&& !cnode
->alias
)
4605 streamer_write_uhwi (ob
, count
);
4607 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4608 lsei_next_function_in_partition (&lsei
))
4610 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4611 if (cnode
->definition
&& !cnode
->alias
)
4613 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4614 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4615 struct bitpack_d bp
;
4616 struct cgraph_edge
*edge
;
4619 struct condition
*c
;
4621 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4622 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4623 streamer_write_hwi (ob
, size_info
->self_size
);
4624 info
->time
.stream_out (ob
);
4625 bp
= bitpack_create (ob
->main_stream
);
4626 bp_pack_value (&bp
, info
->inlinable
, 1);
4627 bp_pack_value (&bp
, false, 1);
4628 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4629 streamer_write_bitpack (&bp
);
4630 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4631 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4634 struct expr_eval_op
*op
;
4636 streamer_write_uhwi (ob
, c
->operand_num
);
4637 streamer_write_uhwi (ob
, c
->code
);
4638 stream_write_tree (ob
, c
->type
, true);
4639 stream_write_tree (ob
, c
->val
, true);
4640 bp
= bitpack_create (ob
->main_stream
);
4641 bp_pack_value (&bp
, c
->agg_contents
, 1);
4642 bp_pack_value (&bp
, c
->by_ref
, 1);
4643 streamer_write_bitpack (&bp
);
4644 if (c
->agg_contents
)
4645 streamer_write_uhwi (ob
, c
->offset
);
4646 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4647 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4649 streamer_write_uhwi (ob
, op
->code
);
4650 stream_write_tree (ob
, op
->type
, true);
4653 bp
= bitpack_create (ob
->main_stream
);
4654 bp_pack_value (&bp
, op
->index
, 2);
4655 streamer_write_bitpack (&bp
);
4656 stream_write_tree (ob
, op
->val
[0], true);
4658 stream_write_tree (ob
, op
->val
[1], true);
4662 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4663 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4665 streamer_write_uhwi (ob
, e
->size
);
4666 e
->time
.stream_out (ob
);
4667 e
->exec_predicate
.stream_out (ob
);
4668 e
->nonconst_predicate
.stream_out (ob
);
4670 ipa_freqcounting_predicate
*fcp
;
4671 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4672 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4674 fcp
->predicate
->stream_out (ob
);
4675 fcp
->freq
.stream_out (ob
);
4677 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4678 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4680 fcp
->predicate
->stream_out (ob
);
4681 fcp
->freq
.stream_out (ob
);
4683 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4685 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4687 streamer_write_uhwi (ob
, ip
);
4688 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4689 write_ipa_call_summary (ob
, edge
);
4690 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4691 write_ipa_call_summary (ob
, edge
);
4694 streamer_write_char_stream (ob
->main_stream
, 0);
4695 produce_asm (ob
, NULL
);
4696 destroy_output_block (ob
);
4698 ipa_prop_write_jump_functions ();
4702 /* Release function summary. */
4705 ipa_free_fn_summary (void)
4707 if (!ipa_call_summaries
)
4709 ggc_delete (ipa_fn_summaries
);
4710 ipa_fn_summaries
= NULL
;
4711 delete ipa_call_summaries
;
4712 ipa_call_summaries
= NULL
;
4713 edge_predicate_pool
.release ();
4714 /* During IPA this is one of largest datastructures to release. */
4719 /* Release function summary. */
4722 ipa_free_size_summary (void)
4724 if (!ipa_size_summaries
)
4726 delete ipa_size_summaries
;
4727 ipa_size_summaries
= NULL
;
4732 const pass_data pass_data_local_fn_summary
=
4734 GIMPLE_PASS
, /* type */
4735 "local-fnsummary", /* name */
4736 OPTGROUP_INLINE
, /* optinfo_flags */
4737 TV_INLINE_PARAMETERS
, /* tv_id */
4738 0, /* properties_required */
4739 0, /* properties_provided */
4740 0, /* properties_destroyed */
4741 0, /* todo_flags_start */
4742 0, /* todo_flags_finish */
4745 class pass_local_fn_summary
: public gimple_opt_pass
4748 pass_local_fn_summary (gcc::context
*ctxt
)
4749 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4752 /* opt_pass methods: */
4753 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
4754 virtual unsigned int execute (function
*)
4756 return compute_fn_summary_for_current ();
4759 }; // class pass_local_fn_summary
4764 make_pass_local_fn_summary (gcc::context
*ctxt
)
4766 return new pass_local_fn_summary (ctxt
);
4770 /* Free inline summary. */
4774 const pass_data pass_data_ipa_free_fn_summary
=
4776 SIMPLE_IPA_PASS
, /* type */
4777 "free-fnsummary", /* name */
4778 OPTGROUP_NONE
, /* optinfo_flags */
4779 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4780 0, /* properties_required */
4781 0, /* properties_provided */
4782 0, /* properties_destroyed */
4783 0, /* todo_flags_start */
4784 0, /* todo_flags_finish */
4787 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4790 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4791 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4795 /* opt_pass methods: */
4796 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
4797 void set_pass_param (unsigned int n
, bool param
)
4799 gcc_assert (n
== 0);
4802 virtual bool gate (function
*) { return true; }
4803 virtual unsigned int execute (function
*)
4805 ipa_free_fn_summary ();
4806 /* Free ipa-prop structures if they are no longer needed. */
4807 ipa_free_all_structures_after_iinln ();
4809 ipa_free_size_summary ();
4815 }; // class pass_ipa_free_fn_summary
4819 simple_ipa_opt_pass
*
4820 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4822 return new pass_ipa_free_fn_summary (ctxt
);
4827 const pass_data pass_data_ipa_fn_summary
=
4829 IPA_PASS
, /* type */
4830 "fnsummary", /* name */
4831 OPTGROUP_INLINE
, /* optinfo_flags */
4832 TV_IPA_FNSUMMARY
, /* tv_id */
4833 0, /* properties_required */
4834 0, /* properties_provided */
4835 0, /* properties_destroyed */
4836 0, /* todo_flags_start */
4837 ( TODO_dump_symtab
), /* todo_flags_finish */
4840 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4843 pass_ipa_fn_summary (gcc::context
*ctxt
)
4844 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4845 ipa_fn_summary_generate
, /* generate_summary */
4846 ipa_fn_summary_write
, /* write_summary */
4847 ipa_fn_summary_read
, /* read_summary */
4848 NULL
, /* write_optimization_summary */
4849 NULL
, /* read_optimization_summary */
4850 NULL
, /* stmt_fixup */
4851 0, /* function_transform_todo_flags_start */
4852 NULL
, /* function_transform */
4853 NULL
) /* variable_transform */
4856 /* opt_pass methods: */
4857 virtual unsigned int execute (function
*) { return 0; }
4859 }; // class pass_ipa_fn_summary
4864 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4866 return new pass_ipa_fn_summary (ctxt
);
4869 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4870 within the same process. For use by toplev::finalize. */
4873 ipa_fnsummary_c_finalize (void)
4875 ipa_free_fn_summary ();