1 /* Function summary pass.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
65 #include "tree-streamer.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-iterator.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
79 #include "ipa-fnsummary.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
85 #include "stringpool.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
93 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
94 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
95 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
97 /* Edge predicates goes here. */
98 static object_allocator
<ipa_predicate
> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
103 ipa_dump_hints (FILE *f
, ipa_hints hints
)
107 fprintf (f
, "IPA hints:");
108 if (hints
& INLINE_HINT_indirect_call
)
110 hints
&= ~INLINE_HINT_indirect_call
;
111 fprintf (f
, " indirect_call");
113 if (hints
& INLINE_HINT_loop_iterations
)
115 hints
&= ~INLINE_HINT_loop_iterations
;
116 fprintf (f
, " loop_iterations");
118 if (hints
& INLINE_HINT_loop_stride
)
120 hints
&= ~INLINE_HINT_loop_stride
;
121 fprintf (f
, " loop_stride");
123 if (hints
& INLINE_HINT_same_scc
)
125 hints
&= ~INLINE_HINT_same_scc
;
126 fprintf (f
, " same_scc");
128 if (hints
& INLINE_HINT_in_scc
)
130 hints
&= ~INLINE_HINT_in_scc
;
131 fprintf (f
, " in_scc");
133 if (hints
& INLINE_HINT_cross_module
)
135 hints
&= ~INLINE_HINT_cross_module
;
136 fprintf (f
, " cross_module");
138 if (hints
& INLINE_HINT_declared_inline
)
140 hints
&= ~INLINE_HINT_declared_inline
;
141 fprintf (f
, " declared_inline");
143 if (hints
& INLINE_HINT_known_hot
)
145 hints
&= ~INLINE_HINT_known_hot
;
146 fprintf (f
, " known_hot");
148 if (hints
& INLINE_HINT_builtin_constant_p
)
150 hints
&= ~INLINE_HINT_builtin_constant_p
;
151 fprintf (f
, " builtin_constant_p");
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
165 ipa_fn_summary::account_size_time (int size
, sreal time
,
166 const ipa_predicate
&exec_pred
,
167 const ipa_predicate
&nonconst_pred_in
,
173 ipa_predicate nonconst_pred
;
174 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
176 if (exec_pred
== false)
179 nonconst_pred
= nonconst_pred_in
& exec_pred
;
181 if (nonconst_pred
== false)
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size
&& time
== 0 && table
->length ())
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time
>= 0 || call
);
192 for (i
= 0; table
->iterate (i
, &e
); i
++)
193 if (e
->exec_predicate
== exec_pred
194 && e
->nonconst_predicate
== nonconst_pred
)
199 if (i
== max_size_time_table_size
)
204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size
) / ipa_fn_summary::size_scale
,
214 (time
.to_double ()), found
? "" : "new ");
215 exec_pred
.dump (dump_file
, conds
, 0);
216 if (exec_pred
!= nonconst_pred
)
218 fprintf (dump_file
, " nonconst:");
219 nonconst_pred
.dump (dump_file
, conds
);
222 fprintf (dump_file
, "\n");
226 class size_time_entry new_entry
;
227 new_entry
.size
= size
;
228 new_entry
.time
= time
;
229 new_entry
.exec_predicate
= exec_pred
;
230 new_entry
.nonconst_predicate
= nonconst_pred
;
232 call_size_time_table
.safe_push (new_entry
);
234 size_time_table
.safe_push (new_entry
);
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge
*
250 redirect_to_unreachable (struct cgraph_edge
*e
)
252 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
253 struct cgraph_node
*target
254 = cgraph_node::get_create (builtin_decl_unreachable ());
257 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
259 e
= cgraph_edge::make_direct (e
, target
);
261 e
->redirect_callee (target
);
262 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
263 e
->inline_failed
= CIF_UNREACHABLE
;
264 e
->count
= profile_count::zero ();
265 es
->call_stmt_size
= 0;
266 es
->call_stmt_time
= 0;
268 callee
->remove_symbol_and_inline_clones ();
272 /* Set predicate for edge E. */
275 edge_set_predicate (struct cgraph_edge
*e
, ipa_predicate
*predicate
)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
280 if (predicate
&& *predicate
== false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e
->speculative
|| e
->callee
))
285 e
= redirect_to_unreachable (e
);
287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
288 if (predicate
&& *predicate
!= true)
291 es
->predicate
= edge_predicate_pool
.allocate ();
292 *es
->predicate
= *predicate
;
297 edge_predicate_pool
.remove (es
->predicate
);
298 es
->predicate
= NULL
;
302 /* Set predicate for hint *P. */
305 set_hint_predicate (ipa_predicate
**p
, ipa_predicate new_predicate
)
307 if (new_predicate
== false || new_predicate
== true)
310 edge_predicate_pool
.remove (*p
);
316 *p
= edge_predicate_pool
.allocate ();
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
327 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
328 const ipa_predicate
&new_predicate
, sreal add_freq
,
329 unsigned max_num_predicates
)
331 if (new_predicate
== false || new_predicate
== true)
333 ipa_freqcounting_predicate
*f
;
334 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
335 if (new_predicate
== f
->predicate
)
340 if (vec_safe_length (*v
) >= max_num_predicates
)
341 /* Too many different predicates to account for. */
344 ipa_freqcounting_predicate fcp
;
345 fcp
.predicate
= NULL
;
346 set_hint_predicate (&fcp
.predicate
, new_predicate
);
348 vec_safe_push (*v
, fcp
);
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
367 ERROR_MARK value of an argument means compile time invariant. */
370 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
372 ipa_auto_call_arg_values
*avals
,
373 clause_t
*ret_clause
,
374 clause_t
*ret_nonspec_clause
)
376 clause_t clause
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
377 clause_t nonspec_clause
= 1 << ipa_predicate::not_inlined_condition
;
378 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
382 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
387 struct expr_eval_op
*op
;
391 if (c
->code
== ipa_predicate::changed
393 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
396 if (tree sval
= avals
->safe_sval_at (c
->operand_num
))
397 val
= ipa_find_agg_cst_from_init (sval
, c
->offset
, c
->by_ref
);
400 ipa_argagg_value_list
avs (avals
);
401 val
= avs
.get_value (c
->operand_num
, c
->offset
/ BITS_PER_UNIT
,
407 val
= avals
->safe_sval_at (c
->operand_num
);
408 if (val
&& val
== error_mark_node
409 && c
->code
!= ipa_predicate::changed
)
414 && (c
->code
== ipa_predicate::changed
415 || c
->code
== ipa_predicate::is_not_constant
))
417 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
418 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
421 if (c
->code
== ipa_predicate::changed
)
423 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
427 if (c
->code
== ipa_predicate::is_not_constant
)
429 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
433 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
435 if (c
->type
!= TREE_TYPE (val
))
436 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
437 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
442 val
= fold_unary (op
->code
, op
->type
, val
);
443 else if (!op
->val
[1])
444 val
= fold_binary (op
->code
, op
->type
,
445 op
->index
? op
->val
[0] : val
,
446 op
->index
? val
: op
->val
[0]);
447 else if (op
->index
== 0)
448 val
= fold_ternary (op
->code
, op
->type
,
449 val
, op
->val
[0], op
->val
[1]);
450 else if (op
->index
== 1)
451 val
= fold_ternary (op
->code
, op
->type
,
452 op
->val
[0], val
, op
->val
[1]);
453 else if (op
->index
== 2)
454 val
= fold_ternary (op
->code
, op
->type
,
455 op
->val
[0], op
->val
[1], val
);
461 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
464 if (res
&& integer_zerop (res
))
466 if (res
&& integer_onep (res
))
468 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
470 |= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
474 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
476 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
478 value_range vr
= avals
->m_known_value_ranges
[c
->operand_num
];
479 if (!vr
.undefined_p ()
481 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
483 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
484 range_cast (vr
, c
->type
);
486 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
488 if (vr
.varying_p () || vr
.undefined_p ())
494 range_op_handler
handler (op
->code
, op
->type
);
496 || !res
.supports_type_p (op
->type
)
497 || !handler
.fold_range (res
, op
->type
, vr
,
498 value_range (op
->type
)))
499 res
.set_varying (op
->type
);
501 else if (!op
->val
[1])
504 range_op_handler
handler (op
->code
, op
->type
);
506 ipa_range_set_and_normalize (op0
, op
->val
[0]);
509 || !res
.supports_type_p (op
->type
)
510 || !handler
.fold_range (res
, op
->type
,
511 op
->index
? op0
: vr
,
512 op
->index
? vr
: op0
))
513 res
.set_varying (op
->type
);
516 res
.set_varying (op
->type
);
519 if (!vr
.varying_p () && !vr
.undefined_p ())
523 range_op_handler
handler (c
->code
, boolean_type_node
);
525 ipa_range_set_and_normalize (val_vr
, c
->val
);
528 || !res
.supports_type_p (boolean_type_node
)
529 || !handler
.fold_range (res
, boolean_type_node
, vr
, val_vr
))
530 res
.set_varying (boolean_type_node
);
538 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
539 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
541 *ret_clause
= clause
;
542 if (ret_nonspec_clause
)
543 *ret_nonspec_clause
= nonspec_clause
;
546 /* Return true if VRP will be exectued on the function.
547 We do not want to anticipate optimizations that will not happen.
549 FIXME: This can be confused with -fdisable and debug counters and thus
550 it should not be used for correctness (only to make heuristics work).
551 This means that inliner should do its own optimizations of expressions
552 that it predicts to be constant so wrong code can not be triggered by
553 builtin_constant_p. */
556 vrp_will_run_p (struct cgraph_node
*node
)
558 return (opt_for_fn (node
->decl
, optimize
)
559 && !opt_for_fn (node
->decl
, optimize_debug
)
560 && opt_for_fn (node
->decl
, flag_tree_vrp
));
563 /* Similarly about FRE. */
566 fre_will_run_p (struct cgraph_node
*node
)
568 return (opt_for_fn (node
->decl
, optimize
)
569 && !opt_for_fn (node
->decl
, optimize_debug
)
570 && opt_for_fn (node
->decl
, flag_tree_fre
));
573 /* Work out what conditions might be true at invocation of E.
574 Compute costs for inlined edge if INLINE_P is true.
576 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
577 (if non-NULL) conditions evaluated for nonspecialized clone called
580 Vectors in AVALS will be populated with useful known information about
581 argument values - information not known to have any uses will be omitted -
582 except for m_known_contexts which will only be calculated if
583 COMPUTE_CONTEXTS is true. */
586 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
587 clause_t
*clause_ptr
,
588 clause_t
*nonspec_clause_ptr
,
589 ipa_auto_call_arg_values
*avals
,
590 bool compute_contexts
)
592 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
593 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
594 class ipa_edge_args
*args
;
597 *clause_ptr
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
599 if (ipa_node_params_sum
600 && !e
->call_stmt_cannot_inline_p
601 && (info
->conds
|| compute_contexts
)
602 && (args
= ipa_edge_args_sum
->get (e
)) != NULL
)
604 struct cgraph_node
*caller
;
605 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
606 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
607 int i
, count
= ipa_get_cs_argument_count (args
);
611 if (e
->caller
->inlined_to
)
612 caller
= e
->caller
->inlined_to
;
615 caller_parms_info
= ipa_node_params_sum
->get (caller
);
616 callee_pi
= ipa_node_params_sum
->get (callee
);
618 /* Watch for thunks. */
620 /* Watch for variadic functions. */
621 count
= MIN (count
, ipa_get_param_count (callee_pi
));
625 for (i
= 0; i
< count
; i
++)
627 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
629 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
630 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
632 /* Determine if we know constant value of the parameter. */
633 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
634 ipa_get_type (callee_pi
, i
));
636 if (!cst
&& e
->call_stmt
637 && i
< (int)gimple_call_num_args (e
->call_stmt
))
639 cst
= gimple_call_arg (e
->call_stmt
, i
);
640 if (!is_gimple_min_invariant (cst
))
645 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
646 if (!avals
->m_known_vals
.length ())
647 avals
->m_known_vals
.safe_grow_cleared (count
, true);
648 avals
->m_known_vals
[i
] = cst
;
650 else if (inline_p
&& !es
->param
[i
].change_prob
)
652 if (!avals
->m_known_vals
.length ())
653 avals
->m_known_vals
.safe_grow_cleared (count
, true);
654 avals
->m_known_vals
[i
] = error_mark_node
;
657 /* If we failed to get simple constant, try value range. */
658 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
659 && vrp_will_run_p (caller
)
660 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
663 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
664 ipa_get_type (callee_pi
,
666 if (!vr
.undefined_p () && !vr
.varying_p ())
668 if (!avals
->m_known_value_ranges
.length ())
670 avals
->m_known_value_ranges
.safe_grow (count
, true);
671 for (int i
= 0; i
< count
; ++i
)
672 new (&avals
->m_known_value_ranges
[i
])
675 avals
->m_known_value_ranges
[i
] = vr
;
679 /* Determine known aggregate values. */
680 if (fre_will_run_p (caller
))
681 ipa_push_agg_values_from_jfunc (caller_parms_info
,
683 &avals
->m_known_aggs
);
686 /* For calls used in polymorphic calls we further determine
687 polymorphic call context. */
689 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
691 ipa_polymorphic_call_context
692 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
693 if (!ctx
.useless_p ())
695 if (!avals
->m_known_contexts
.length ())
696 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
697 avals
->m_known_contexts
[i
]
698 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
703 gcc_assert (!count
|| callee
->thunk
);
705 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
707 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
709 for (i
= 0; i
< count
; i
++)
711 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
712 if (!is_gimple_min_invariant (cst
))
716 if (!avals
->m_known_vals
.length ())
717 avals
->m_known_vals
.safe_grow_cleared (count
, true);
718 avals
->m_known_vals
[i
] = cst
;
723 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
728 /* Allocate the function summary. */
731 ipa_fn_summary_alloc (void)
733 gcc_checking_assert (!ipa_fn_summaries
);
734 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
735 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
736 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
739 ipa_call_summary::~ipa_call_summary ()
742 edge_predicate_pool
.remove (predicate
);
747 ipa_fn_summary::~ipa_fn_summary ()
749 unsigned len
= vec_safe_length (loop_iterations
);
750 for (unsigned i
= 0; i
< len
; i
++)
751 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
752 len
= vec_safe_length (loop_strides
);
753 for (unsigned i
= 0; i
< len
; i
++)
754 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
756 call_size_time_table
.release ();
757 vec_free (loop_iterations
);
758 vec_free (loop_strides
);
759 builtin_constant_p_parms
.release ();
763 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
766 for (e
= node
->callees
; e
; e
= e
->next_callee
)
767 ipa_call_summaries
->remove (e
);
768 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
769 ipa_call_summaries
->remove (e
);
772 /* Duplicate predicates in loop hint vector, allocating memory for them and
773 remove and deallocate any uninteresting (true or false) ones. Return the
776 static vec
<ipa_freqcounting_predicate
, va_gc
> *
777 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
778 clause_t possible_truths
)
780 if (vec_safe_length (v
) == 0)
783 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
784 int len
= res
->length();
785 for (int i
= len
- 1; i
>= 0; i
--)
787 ipa_predicate new_predicate
788 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
789 /* We do not want to free previous predicate; it is used by node
791 (*res
)[i
].predicate
= NULL
;
792 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
794 if (!(*res
)[i
].predicate
)
795 res
->unordered_remove (i
);
802 /* Hook that is called by cgraph.cc when a node is duplicated. */
804 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
806 ipa_fn_summary
*src_info
,
807 ipa_fn_summary
*info
)
809 new (info
) ipa_fn_summary (*src_info
);
810 /* TODO: as an optimization, we may avoid copying conditions
811 that are known to be false or true. */
812 info
->conds
= vec_safe_copy (info
->conds
);
814 clone_info
*cinfo
= clone_info::get (dst
);
815 /* When there are any replacements in the function body, see if we can figure
816 out that something was optimized out. */
817 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
819 /* Use SRC parm info since it may not be copied yet. */
820 ipa_node_params
*parms_info
= ipa_node_params_sum
->get (src
);
821 ipa_auto_call_arg_values avals
;
822 int count
= ipa_get_param_count (parms_info
);
824 clause_t possible_truths
;
825 ipa_predicate true_pred
= true;
827 int optimized_out_size
= 0;
828 bool inlined_to_p
= false;
829 struct cgraph_edge
*edge
, *next
;
831 info
->size_time_table
.release ();
832 avals
.m_known_vals
.safe_grow_cleared (count
, true);
833 for (i
= 0; i
< count
; i
++)
835 struct ipa_replace_map
*r
;
837 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
839 if (r
->parm_num
== i
)
841 avals
.m_known_vals
[i
] = r
->new_tree
;
846 evaluate_conditions_for_known_args (dst
, false,
849 /* We are going to specialize,
850 so ignore nonspec truths. */
853 info
->account_size_time (0, 0, true_pred
, true_pred
);
855 /* Remap size_time vectors.
856 Simplify the predicate by pruning out alternatives that are known
858 TODO: as on optimization, we can also eliminate conditions known
860 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
862 ipa_predicate new_exec_pred
;
863 ipa_predicate new_nonconst_pred
;
864 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
866 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
868 if (new_exec_pred
== false || new_nonconst_pred
== false)
869 optimized_out_size
+= e
->size
;
871 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
875 /* Remap edge predicates with the same simplification as above.
876 Also copy constantness arrays. */
877 for (edge
= dst
->callees
; edge
; edge
= next
)
879 ipa_predicate new_predicate
;
880 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
881 next
= edge
->next_callee
;
883 if (!edge
->inline_failed
)
887 new_predicate
= es
->predicate
->remap_after_duplication
889 if (new_predicate
== false && *es
->predicate
!= false)
890 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
891 edge_set_predicate (edge
, &new_predicate
);
894 /* Remap indirect edge predicates with the same simplification as above.
895 Also copy constantness arrays. */
896 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
898 ipa_predicate new_predicate
;
899 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
900 next
= edge
->next_callee
;
902 gcc_checking_assert (edge
->inline_failed
);
905 new_predicate
= es
->predicate
->remap_after_duplication
907 if (new_predicate
== false && *es
->predicate
!= false)
909 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
910 edge_set_predicate (edge
, &new_predicate
);
912 info
->loop_iterations
913 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
916 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
918 if (info
->builtin_constant_p_parms
.length())
920 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
922 info
->builtin_constant_p_parms
= vNULL
;
923 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
924 if (!avals
.m_known_vals
[ip
])
925 info
->builtin_constant_p_parms
.safe_push (ip
);
928 /* If inliner or someone after inliner will ever start producing
929 non-trivial clones, we will get trouble with lack of information
930 about updating self sizes, because size vectors already contains
931 sizes of the callees. */
932 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
936 info
->size_time_table
= src_info
->size_time_table
.copy ();
937 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
938 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
940 info
->builtin_constant_p_parms
941 = info
->builtin_constant_p_parms
.copy ();
943 ipa_freqcounting_predicate
*f
;
944 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
946 ipa_predicate p
= *f
->predicate
;
948 set_hint_predicate (&f
->predicate
, p
);
950 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
952 ipa_predicate p
= *f
->predicate
;
954 set_hint_predicate (&f
->predicate
, p
);
957 if (!dst
->inlined_to
)
958 ipa_update_overall_fn_summary (dst
);
962 /* Hook that is called by cgraph.cc when a node is duplicated. */
965 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
966 struct cgraph_edge
*dst
,
967 class ipa_call_summary
*srcinfo
,
968 class ipa_call_summary
*info
)
970 new (info
) ipa_call_summary (*srcinfo
);
971 info
->predicate
= NULL
;
972 edge_set_predicate (dst
, srcinfo
->predicate
);
973 info
->param
= srcinfo
->param
.copy ();
974 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
976 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
977 - eni_size_weights
.call_cost
);
978 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
979 - eni_time_weights
.call_cost
);
983 /* Dump edge summaries associated to NODE and recursively to all clones.
987 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
988 class ipa_fn_summary
*info
)
990 struct cgraph_edge
*edge
;
991 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
993 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
994 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
998 "%*s%s %s\n%*s freq:%4.2f",
999 indent
, "", callee
->dump_name (),
1000 !edge
->inline_failed
1001 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1002 indent
, "", edge
->sreal_frequency ().to_double ());
1004 if (cross_module_call_p (edge
))
1005 fprintf (f
, " cross module");
1008 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1009 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1011 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1012 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1014 fprintf (f
, " callee size:%2i stack:%2i",
1015 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1016 (int) s
->estimated_stack_size
);
1018 if (es
&& es
->predicate
)
1020 fprintf (f
, " predicate: ");
1021 es
->predicate
->dump (f
, info
->conds
);
1025 if (es
&& es
->param
.exists ())
1026 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1028 int prob
= es
->param
[i
].change_prob
;
1031 fprintf (f
, "%*s op%i is compile time invariant\n",
1033 else if (prob
!= REG_BR_PROB_BASE
)
1034 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1035 prob
* 100.0 / REG_BR_PROB_BASE
);
1036 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1037 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1040 if (!edge
->inline_failed
)
1042 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1043 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1045 (int) ipa_get_stack_frame_offset (callee
),
1046 (int) ss
->estimated_self_stack_size
);
1047 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1050 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1052 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1053 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1057 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1058 es
->call_stmt_time
);
1061 fprintf (f
, "predicate: ");
1062 es
->predicate
->dump (f
, info
->conds
);
1071 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1073 if (node
->definition
)
1075 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1076 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1081 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1082 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1083 fprintf (f
, " always_inline");
1085 fprintf (f
, " inlinable");
1086 if (s
->fp_expressions
)
1087 fprintf (f
, " fp_expression");
1088 if (s
->builtin_constant_p_parms
.length ())
1090 fprintf (f
, " builtin_constant_p_parms");
1091 for (unsigned int i
= 0;
1092 i
< s
->builtin_constant_p_parms
.length (); i
++)
1093 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1095 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1096 fprintf (f
, " self size: %i\n", ss
->self_size
);
1097 fprintf (f
, " global size: %i\n", ss
->size
);
1098 fprintf (f
, " min size: %i\n", s
->min_size
);
1099 fprintf (f
, " self stack: %i\n",
1100 (int) ss
->estimated_self_stack_size
);
1101 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1103 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1105 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1106 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1108 fprintf (f
, " size:%f, time:%f",
1109 (double) e
->size
/ ipa_fn_summary::size_scale
,
1110 e
->time
.to_double ());
1111 if (e
->exec_predicate
!= true)
1113 fprintf (f
, ", executed if:");
1114 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1116 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1118 fprintf (f
, ", nonconst if:");
1119 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1123 ipa_freqcounting_predicate
*fcp
;
1124 bool first_fcp
= true;
1125 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1129 fprintf (f
, " loop iterations:");
1132 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1133 fcp
->predicate
->dump (f
, s
->conds
);
1136 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1140 fprintf (f
, " loop strides:");
1143 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1144 fcp
->predicate
->dump (f
, s
->conds
);
1146 fprintf (f
, " calls:\n");
1147 dump_ipa_call_summary (f
, 4, node
, s
);
1150 fprintf (f
, " target_info: %x\n", s
->target_info
);
1153 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1158 ipa_debug_fn_summary (struct cgraph_node
*node
)
1160 ipa_dump_fn_summary (stderr
, node
);
1164 ipa_dump_fn_summaries (FILE *f
)
1166 struct cgraph_node
*node
;
1168 FOR_EACH_DEFINED_FUNCTION (node
)
1169 if (!node
->inlined_to
)
1170 ipa_dump_fn_summary (f
, node
);
1173 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1174 boolean variable pointed to by DATA. */
1177 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1180 bool *b
= (bool *) data
;
1185 /* If OP refers to value of function parameter, return the corresponding
1186 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1187 PARM_DECL) will be stored to *SIZE_P in that case too. */
1190 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1193 /* SSA_NAME referring to parm default def? */
1194 if (TREE_CODE (op
) == SSA_NAME
1195 && SSA_NAME_IS_DEFAULT_DEF (op
)
1196 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1199 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1200 return SSA_NAME_VAR (op
);
1202 /* Non-SSA parm reference? */
1203 if (TREE_CODE (op
) == PARM_DECL
1204 && fbi
->aa_walk_budget
> 0)
1206 bool modified
= false;
1209 ao_ref_init (&refd
, op
);
1210 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1211 mark_modified
, &modified
, NULL
, NULL
,
1212 fbi
->aa_walk_budget
);
1215 fbi
->aa_walk_budget
= 0;
1218 fbi
->aa_walk_budget
-= walked
;
1222 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1229 /* If OP refers to value of function parameter, return the corresponding
1230 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1231 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1232 stored to *SIZE_P in that case too. */
1235 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1238 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1242 if (TREE_CODE (op
) == SSA_NAME
1243 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1244 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1245 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1246 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1251 /* If OP refers to a value of a function parameter or value loaded from an
1252 aggregate passed to a parameter (either by value or reference), return TRUE
1253 and store the number of the parameter to *INDEX_P, the access size into
1254 *SIZE_P, and information whether and how it has been loaded from an
1255 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1256 statement in which OP is used or loaded. */
1259 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1260 gimple
*stmt
, tree op
, int *index_p
,
1262 struct agg_position_info
*aggpos
)
1264 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1266 gcc_checking_assert (aggpos
);
1269 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1272 aggpos
->agg_contents
= false;
1273 aggpos
->by_ref
= false;
1277 if (TREE_CODE (op
) == SSA_NAME
)
1279 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1280 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1282 stmt
= SSA_NAME_DEF_STMT (op
);
1283 op
= gimple_assign_rhs1 (stmt
);
1284 if (!REFERENCE_CLASS_P (op
))
1285 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1289 aggpos
->agg_contents
= true;
1290 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1291 stmt
, op
, index_p
, &aggpos
->offset
,
1292 size_p
, &aggpos
->by_ref
);
1295 /* See if statement might disappear after inlining.
1296 0 - means not eliminated
1297 1 - half of statements goes away
1298 2 - for sure it is eliminated.
1299 We are not terribly sophisticated, basically looking for simple abstraction
1300 penalty wrappers. */
1303 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1305 enum gimple_code code
= gimple_code (stmt
);
1306 enum tree_code rhs_code
;
1316 if (gimple_num_ops (stmt
) != 2)
1319 rhs_code
= gimple_assign_rhs_code (stmt
);
1321 /* Casts of parameters, loads from parameters passed by reference
1322 and stores to return value or parameters are often free after
1323 inlining due to SRA and further combining.
1324 Assume that half of statements goes away. */
1325 if (CONVERT_EXPR_CODE_P (rhs_code
)
1326 || rhs_code
== VIEW_CONVERT_EXPR
1327 || rhs_code
== ADDR_EXPR
1328 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1330 tree rhs
= gimple_assign_rhs1 (stmt
);
1331 tree lhs
= gimple_assign_lhs (stmt
);
1332 tree inner_rhs
= get_base_address (rhs
);
1333 tree inner_lhs
= get_base_address (lhs
);
1334 bool rhs_free
= false;
1335 bool lhs_free
= false;
1342 /* Reads of parameter are expected to be free. */
1343 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1345 /* Match expressions of form &this->field. Those will most likely
1346 combine with something upstream after inlining. */
1347 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1349 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1350 if (TREE_CODE (op
) == PARM_DECL
)
1352 else if (TREE_CODE (op
) == MEM_REF
1353 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1358 /* When parameter is not SSA register because its address is taken
1359 and it is just copied into one, the statement will be completely
1360 free after inlining (we will copy propagate backward). */
1361 if (rhs_free
&& is_gimple_reg (lhs
))
1364 /* Reads of parameters passed by reference
1365 expected to be free (i.e. optimized out after inlining). */
1366 if (TREE_CODE (inner_rhs
) == MEM_REF
1367 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1370 /* Copying parameter passed by reference into gimple register is
1371 probably also going to copy propagate, but we can't be quite
1373 if (rhs_free
&& is_gimple_reg (lhs
))
1376 /* Writes to parameters, parameters passed by value and return value
1377 (either directly or passed via invisible reference) are free.
1379 TODO: We ought to handle testcase like
1380 struct a {int a,b;};
1388 This translate into:
1403 For that we either need to copy ipa-split logic detecting writes
1405 if (TREE_CODE (inner_lhs
) == PARM_DECL
1406 || TREE_CODE (inner_lhs
) == RESULT_DECL
1407 || (TREE_CODE (inner_lhs
) == MEM_REF
1408 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1410 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1411 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1412 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1414 0))) == RESULT_DECL
))))
1417 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1419 if (lhs_free
&& rhs_free
)
1428 /* Analyze EXPR if it represents a series of simple operations performed on
1429 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1430 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1431 Type of the parameter or load from an aggregate via the parameter is
1432 stored in *TYPE_P. Operations on the parameter are recorded to
1433 PARAM_OPS_P if it is not NULL. */
1436 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1437 gimple
*stmt
, tree expr
,
1438 int *index_p
, tree
*type_p
,
1439 struct agg_position_info
*aggpos
,
1440 expr_eval_ops
*param_ops_p
= NULL
)
1442 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1446 *param_ops_p
= NULL
;
1450 expr_eval_op eval_op
;
1452 unsigned cst_count
= 0;
1454 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1457 tree type
= TREE_TYPE (expr
);
1459 if (aggpos
->agg_contents
)
1461 /* Stop if containing bit-field. */
1462 if (TREE_CODE (expr
) == BIT_FIELD_REF
1463 || contains_bitfld_component_ref_p (expr
))
1471 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1474 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1477 switch (gimple_assign_rhs_class (stmt
))
1479 case GIMPLE_SINGLE_RHS
:
1480 expr
= gimple_assign_rhs1 (stmt
);
1483 case GIMPLE_UNARY_RHS
:
1487 case GIMPLE_BINARY_RHS
:
1491 case GIMPLE_TERNARY_RHS
:
1499 /* Stop if expression is too complex. */
1500 if (op_count
++ == op_limit
)
1505 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1506 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1507 eval_op
.val
[0] = NULL_TREE
;
1508 eval_op
.val
[1] = NULL_TREE
;
1512 for (unsigned i
= 0; i
< rhs_count
; i
++)
1514 tree op
= gimple_op (stmt
, i
+ 1);
1516 gcc_assert (op
&& !TYPE_P (op
));
1517 if (is_gimple_ip_invariant (op
))
1519 if (++cst_count
== rhs_count
)
1522 eval_op
.val
[cst_count
- 1] = op
;
1526 /* Found a non-constant operand, and record its index in rhs
1533 /* Found more than one non-constant operands. */
1539 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1542 /* Failed to decompose, free resource and return. */
1545 vec_free (*param_ops_p
);
1550 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1553 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1557 /* Avoid duplicates. */
1558 for (unsigned int i
= 0;
1559 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1562 summary
->builtin_constant_p_parms
.safe_push (parm
);
1565 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1566 predicates to the CFG edges. */
1569 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1570 class ipa_fn_summary
*summary
,
1571 class ipa_node_params
*params_summary
,
1576 struct agg_position_info aggpos
;
1577 enum tree_code code
, inverted_code
;
1582 expr_eval_ops param_ops
;
1584 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
1587 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1589 op
= gimple_cond_lhs (last
);
1591 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1594 code
= gimple_cond_code (last
);
1595 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1597 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1599 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1600 ? code
: inverted_code
);
1601 /* invert_tree_comparison will return ERROR_MARK on FP
1602 comparisons that are not EQ/NE instead of returning proper
1603 unordered one. Be sure it is not confused with NON_CONSTANT.
1605 And if the edge's target is the final block of diamond CFG graph
1606 of this conditional statement, we do not need to compute
1607 predicate for the edge because the final block's predicate must
1608 be at least as that of the first block of the statement. */
1609 if (this_code
!= ERROR_MARK
1610 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1613 = add_condition (summary
, params_summary
, index
,
1614 param_type
, &aggpos
,
1615 this_code
, gimple_cond_rhs (last
), param_ops
);
1616 e
->aux
= edge_predicate_pool
.allocate ();
1617 *(ipa_predicate
*) e
->aux
= p
;
1620 vec_free (param_ops
);
1623 if (TREE_CODE (op
) != SSA_NAME
)
1626 if (builtin_constant_p (op))
1630 Here we can predicate nonconstant_code. We can't
1631 really handle constant_code since we have no predicate
1632 for this and also the constant code is not known to be
1633 optimized away when inliner doesn't see operand is constant.
1634 Other optimizers might think otherwise. */
1635 if (gimple_cond_code (last
) != NE_EXPR
1636 || !integer_zerop (gimple_cond_rhs (last
)))
1638 set_stmt
= SSA_NAME_DEF_STMT (op
);
1639 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1640 || gimple_call_num_args (set_stmt
) != 1)
1642 op2
= gimple_call_arg (set_stmt
, 0);
1643 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1646 add_builtin_constant_p_parm (summary
, index
);
1647 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1649 ipa_predicate p
= add_condition (summary
, params_summary
, index
,
1650 param_type
, &aggpos
,
1651 ipa_predicate::is_not_constant
, NULL_TREE
);
1652 e
->aux
= edge_predicate_pool
.allocate ();
1653 *(ipa_predicate
*) e
->aux
= p
;
1658 /* If BB ends by a switch we can turn into predicates, attach corresponding
1659 predicates to the CFG edges. */
1662 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1663 class ipa_fn_summary
*summary
,
1664 class ipa_node_params
*params_summary
,
1669 struct agg_position_info aggpos
;
1675 expr_eval_ops param_ops
;
1677 gswitch
*last
= safe_dyn_cast
<gswitch
*> (*gsi_last_bb (bb
));
1680 op
= gimple_switch_index (last
);
1681 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1685 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1686 tree type
= TREE_TYPE (op
);
1687 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1688 param_ipa_max_switch_predicate_bounds
);
1689 int bound_count
= 0;
1692 get_range_query (cfun
)->range_of_expr (vr
, op
);
1693 if (vr
.undefined_p ())
1694 vr
.set_varying (TREE_TYPE (op
));
1695 tree vr_min
, vr_max
;
1696 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
1697 wide_int vr_wmin
= wi::to_wide (vr_min
);
1698 wide_int vr_wmax
= wi::to_wide (vr_max
);
1700 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1702 e
->aux
= edge_predicate_pool
.allocate ();
1703 *(ipa_predicate
*) e
->aux
= false;
1706 e
= gimple_switch_edge (cfun
, last
, 0);
1707 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1708 default case if its target basic block is in convergence point of all
1709 switch cases, which can be determined by checking whether it
1710 post-dominates the switch statement. */
1711 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1712 bound_count
= INT_MAX
;
1714 n
= gimple_switch_num_labels (last
);
1715 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1717 tree cl
= gimple_switch_label (last
, case_idx
);
1718 tree min
= CASE_LOW (cl
);
1719 tree max
= CASE_HIGH (cl
);
1722 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1724 /* The case value might not have same type as switch expression,
1725 extend the value based on the expression type. */
1726 if (TREE_TYPE (min
) != type
)
1727 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1731 else if (TREE_TYPE (max
) != type
)
1732 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1734 /* The case's target basic block is in convergence point of all switch
1735 cases, its predicate should be at least as that of the switch
1737 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1739 else if (min
== max
)
1740 p
= add_condition (summary
, params_summary
, index
, param_type
,
1741 &aggpos
, EQ_EXPR
, min
, param_ops
);
1744 ipa_predicate p1
, p2
;
1745 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1746 &aggpos
, GE_EXPR
, min
, param_ops
);
1747 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1748 &aggpos
, LE_EXPR
, max
, param_ops
);
1751 *(ipa_predicate
*) e
->aux
1752 = p
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1754 /* If there are too many disjoint case ranges, predicate for default
1755 case might become too complicated. So add a limit here. */
1756 if (bound_count
> bound_limit
)
1759 bool new_range
= true;
1761 if (!ranges
.is_empty ())
1763 wide_int curr_wmin
= wi::to_wide (min
);
1764 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1766 /* Merge case ranges if they are continuous. */
1767 if (curr_wmin
== last_wmax
+ 1)
1769 else if (vr_type
== VR_ANTI_RANGE
)
1771 /* If two disjoint case ranges can be connected by anti-range
1772 of switch index, combine them to one range. */
1773 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1774 vr_type
= VR_UNDEFINED
;
1775 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1780 /* Create/extend a case range. And we count endpoints of range set,
1781 this number nearly equals to number of conditions that we will create
1782 for predicate of default case. */
1785 bound_count
+= (min
== max
) ? 1 : 2;
1786 ranges
.safe_push (std::make_pair (min
, max
));
1790 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1791 ranges
.last ().second
= max
;
1795 e
= gimple_switch_edge (cfun
, last
, 0);
1796 if (bound_count
> bound_limit
)
1798 *(ipa_predicate
*) e
->aux
= true;
1799 vec_free (param_ops
);
1803 ipa_predicate p_seg
= true;
1804 ipa_predicate p_all
= false;
1806 if (vr_type
!= VR_RANGE
)
1808 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1809 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1812 /* Construct predicate to represent default range set that is negation of
1813 all case ranges. Case range is classified as containing single/non-single
1814 values. Suppose a piece of case ranges in the following.
1816 [D1...D2] [S1] ... [Sn] [D3...D4]
1818 To represent default case's range sets between two non-single value
1819 case ranges (From D2 to D3), we construct predicate as:
1821 D2 < x < D3 && x != S1 && ... && x != Sn
1823 for (size_t i
= 0; i
< ranges
.length (); i
++)
1825 tree min
= ranges
[i
].first
;
1826 tree max
= ranges
[i
].second
;
1829 p_seg
&= add_condition (summary
, params_summary
, index
,
1830 param_type
, &aggpos
, NE_EXPR
,
1834 /* Do not create sub-predicate for range that is beyond low bound
1836 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1838 p_seg
&= add_condition (summary
, params_summary
, index
,
1839 param_type
, &aggpos
,
1840 LT_EXPR
, min
, param_ops
);
1841 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1844 /* Do not create sub-predicate for range that is beyond up bound
1846 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1852 p_seg
= add_condition (summary
, params_summary
, index
,
1853 param_type
, &aggpos
, GT_EXPR
,
1858 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1859 *(ipa_predicate
*) e
->aux
1860 = p_all
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1862 vec_free (param_ops
);
1866 /* For each BB in NODE attach to its AUX pointer predicate under
1867 which it is executable. */
1870 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1871 struct cgraph_node
*node
,
1872 class ipa_fn_summary
*summary
,
1873 class ipa_node_params
*params_summary
)
1875 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1879 FOR_EACH_BB_FN (bb
, my_function
)
1881 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1882 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1885 /* Entry block is always executable. */
1886 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1887 = edge_predicate_pool
.allocate ();
1888 *(ipa_predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1890 /* A simple dataflow propagation of predicates forward in the CFG.
1891 TODO: work in reverse postorder. */
1895 FOR_EACH_BB_FN (bb
, my_function
)
1897 ipa_predicate p
= false;
1900 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1904 ipa_predicate this_bb_predicate
1905 = *(ipa_predicate
*) e
->src
->aux
;
1907 this_bb_predicate
&= (*(ipa_predicate
*) e
->aux
);
1908 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1915 basic_block pdom_bb
;
1920 bb
->aux
= edge_predicate_pool
.allocate ();
1921 *((ipa_predicate
*) bb
->aux
) = p
;
1923 else if (p
!= *(ipa_predicate
*) bb
->aux
)
1925 /* This OR operation is needed to ensure monotonous data flow
1926 in the case we hit the limit on number of clauses and the
1927 and/or operations above give approximate answers. */
1928 p
= p
.or_with (summary
->conds
, *(ipa_predicate
*)bb
->aux
);
1929 if (p
!= *(ipa_predicate
*)bb
->aux
)
1932 *((ipa_predicate
*)bb
->aux
) = p
;
1936 /* For switch/if statement, we can OR-combine predicates of all
1937 its cases/branches to get predicate for basic block in their
1938 convergence point, but sometimes this will generate very
1939 complicated predicate. Actually, we can get simplified
1940 predicate in another way by using the fact that predicate
1941 for a basic block must also hold true for its post dominators.
1942 To be specific, basic block in convergence point of
1943 conditional statement should include predicate of the
1945 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1946 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1948 else if (!pdom_bb
->aux
)
1951 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1952 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1954 else if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1956 p
= p
.or_with (summary
->conds
,
1957 *(ipa_predicate
*)pdom_bb
->aux
);
1958 if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1961 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1970 /* Return predicate specifying when the STMT might have result that is not
1971 a compile time constant. */
1973 static ipa_predicate
1974 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1975 class ipa_fn_summary
*summary
,
1976 class ipa_node_params
*params_summary
,
1978 vec
<ipa_predicate
> nonconstant_names
)
1983 while (UNARY_CLASS_P (expr
))
1984 expr
= TREE_OPERAND (expr
, 0);
1986 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1987 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1988 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1989 ipa_predicate::changed
, NULL_TREE
);
1990 if (is_gimple_min_invariant (expr
))
1992 if (TREE_CODE (expr
) == SSA_NAME
)
1993 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1994 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1997 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1999 TREE_OPERAND (expr
, 0),
2005 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2007 TREE_OPERAND (expr
, 1),
2009 return p1
.or_with (summary
->conds
, p2
);
2011 else if (TREE_CODE (expr
) == COND_EXPR
)
2014 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2016 TREE_OPERAND (expr
, 0),
2022 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2024 TREE_OPERAND (expr
, 1),
2028 p1
= p1
.or_with (summary
->conds
, p2
);
2029 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2031 TREE_OPERAND (expr
, 2),
2033 return p2
.or_with (summary
->conds
, p1
);
2035 else if (TREE_CODE (expr
) == CALL_EXPR
)
2045 /* Return predicate specifying when the STMT might have result that is not
2046 a compile time constant. */
2048 static ipa_predicate
2049 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2050 class ipa_fn_summary
*summary
,
2051 class ipa_node_params
*params_summary
,
2053 vec
<ipa_predicate
> nonconstant_names
)
2055 ipa_predicate p
= true;
2058 tree param_type
= NULL_TREE
;
2059 ipa_predicate op_non_const
;
2062 struct agg_position_info aggpos
;
2064 /* What statements might be optimized away
2065 when their arguments are constant. */
2066 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2067 && gimple_code (stmt
) != GIMPLE_COND
2068 && gimple_code (stmt
) != GIMPLE_SWITCH
2069 && (gimple_code (stmt
) != GIMPLE_CALL
2070 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2073 /* Stores will stay anyway. */
2074 if (gimple_store_p (stmt
))
2077 is_load
= gimple_assign_load_p (stmt
);
2079 /* Loads can be optimized when the value is known. */
2082 tree op
= gimple_assign_rhs1 (stmt
);
2083 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2090 /* See if we understand all operands before we start
2091 adding conditionals. */
2092 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2094 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2095 /* For arguments we can build a condition. */
2096 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2098 if (TREE_CODE (use
) != SSA_NAME
)
2100 /* If we know when operand is constant,
2101 we still can say something useful. */
2102 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2109 add_condition (summary
, params_summary
,
2110 base_index
, param_type
, &aggpos
,
2111 ipa_predicate::changed
, NULL_TREE
);
2113 op_non_const
= false;
2114 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2116 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2119 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2121 if (index
!= base_index
)
2122 p
= add_condition (summary
, params_summary
, index
,
2123 TREE_TYPE (parm
), NULL
,
2124 ipa_predicate::changed
, NULL_TREE
);
2129 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2130 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2132 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2133 && gimple_op (stmt
, 0)
2134 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2135 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2137 return op_non_const
;
2140 struct record_modified_bb_info
2147 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2148 probability how often it changes between USE_BB.
2149 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2150 is in different loop nest, we can do better.
2151 This is all just estimate. In theory we look for minimal cut separating
2152 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2156 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2158 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2159 if (l
&& l
->header
->count
< init_bb
->count
)
2164 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2165 set except for info->stmt. */
2168 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2170 struct record_modified_bb_info
*info
=
2171 (struct record_modified_bb_info
*) data
;
2172 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2174 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2176 bitmap_set_bit (info
->bb_set
,
2177 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2178 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2180 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2181 gimple_bb (info
->stmt
))->index
);
2184 fprintf (dump_file
, " Param ");
2185 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2186 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2187 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2189 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2190 gimple_bb (info
->stmt
))->index
);
2191 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2196 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2197 will change since last invocation of STMT.
2199 Value 0 is reserved for compile time invariants.
2200 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2201 ought to be REG_BR_PROB_BASE / estimated_iters. */
2204 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2206 tree op
= gimple_call_arg (stmt
, i
);
2207 basic_block bb
= gimple_bb (stmt
);
2209 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2210 op
= TREE_OPERAND (op
, 0);
2212 tree base
= get_base_address (op
);
2214 /* Global invariants never change. */
2215 if (is_gimple_min_invariant (base
))
2218 /* We would have to do non-trivial analysis to really work out what
2219 is the probability of value to change (i.e. when init statement
2220 is in a sibling loop of the call).
2222 We do an conservative estimate: when call is executed N times more often
2223 than the statement defining value, we take the frequency 1/N. */
2224 if (TREE_CODE (base
) == SSA_NAME
)
2226 profile_count init_count
;
2228 if (!bb
->count
.nonzero_p ())
2229 return REG_BR_PROB_BASE
;
2231 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2232 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2234 init_count
= get_minimal_bb
2235 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2236 gimple_bb (stmt
))->count
;
2238 if (init_count
< bb
->count
)
2239 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2240 * REG_BR_PROB_BASE
).to_int (), 1);
2241 return REG_BR_PROB_BASE
;
2246 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2247 struct record_modified_bb_info info
;
2248 tree init
= ctor_for_folding (base
);
2250 if (init
!= error_mark_node
)
2252 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2253 return REG_BR_PROB_BASE
;
2256 fprintf (dump_file
, " Analyzing param change probability of ");
2257 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2258 fprintf (dump_file
, "\n");
2260 ao_ref_init (&refd
, op
);
2263 info
.bb_set
= BITMAP_ALLOC (NULL
);
2265 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2266 NULL
, NULL
, fbi
->aa_walk_budget
);
2268 fbi
->aa_walk_budget
-= walked
;
2269 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2272 fbi
->aa_walk_budget
= 0;
2276 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2278 fprintf (dump_file
, " Set in same BB as used.\n");
2280 BITMAP_FREE (info
.bb_set
);
2281 return REG_BR_PROB_BASE
;
2286 /* Lookup the most frequent update of the value and believe that
2287 it dominates all the other; precise analysis here is difficult. */
2288 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2289 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2292 fprintf (dump_file
, " Set with count ");
2293 max
.dump (dump_file
);
2294 fprintf (dump_file
, " and used with count ");
2295 bb
->count
.dump (dump_file
);
2296 fprintf (dump_file
, " freq %f\n",
2297 max
.to_sreal_scale (bb
->count
).to_double ());
2300 BITMAP_FREE (info
.bb_set
);
2301 if (max
< bb
->count
)
2302 return MAX ((max
.to_sreal_scale (bb
->count
)
2303 * REG_BR_PROB_BASE
).to_int (), 1);
2304 return REG_BR_PROB_BASE
;
2308 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2309 sub-graph and if the predicate the condition depends on is known. If so,
2310 return true and store the pointer the predicate in *P. */
2313 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2314 ipa_fn_summary
*summary
,
2315 class ipa_node_params
*params_summary
,
2318 vec
<ipa_predicate
> nonconstant_names
)
2322 basic_block first_bb
= NULL
;
2324 if (single_pred_p (bb
))
2330 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2332 if (single_succ_p (e
->src
))
2334 if (!single_pred_p (e
->src
))
2337 first_bb
= single_pred (e
->src
);
2338 else if (single_pred (e
->src
) != first_bb
)
2345 else if (e
->src
!= first_bb
)
2353 gcond
*stmt
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (first_bb
));
2355 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2358 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2359 gimple_cond_lhs (stmt
),
2367 /* Given a PHI statement in a function described by inline properties SUMMARY
2368 and *P being the predicate describing whether the selected PHI argument is
2369 known, store a predicate for the result of the PHI statement into
2370 NONCONSTANT_NAMES, if possible. */
2373 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2375 vec
<ipa_predicate
> nonconstant_names
)
2379 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2381 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2382 if (!is_gimple_min_invariant (arg
))
2384 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2385 *p
= p
->or_with (summary
->conds
,
2386 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2394 fprintf (dump_file
, "\t\tphi predicate: ");
2395 p
->dump (dump_file
, summary
->conds
);
2397 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2400 /* For a typical usage of __builtin_expect (a<b, 1), we
2401 may introduce an extra relation stmt:
2402 With the builtin, we have
2405 t3 = __builtin_expect (t2, 1);
2408 Without the builtin, we have
2411 This affects the size/time estimation and may have
2412 an impact on the earlier inlining.
2413 Here find this pattern and fix it up later. */
2416 find_foldable_builtin_expect (basic_block bb
)
2418 gimple_stmt_iterator bsi
;
2420 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2422 gimple
*stmt
= gsi_stmt (bsi
);
2423 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2424 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2425 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2427 tree var
= gimple_call_lhs (stmt
);
2428 tree arg
= gimple_call_arg (stmt
, 0);
2429 use_operand_p use_p
;
2436 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2438 while (TREE_CODE (arg
) == SSA_NAME
)
2440 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2441 if (!is_gimple_assign (stmt_tmp
))
2443 switch (gimple_assign_rhs_code (stmt_tmp
))
2462 arg
= gimple_assign_rhs1 (stmt_tmp
);
2465 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2466 && gimple_code (use_stmt
) == GIMPLE_COND
)
2473 /* Return true when the basic blocks contains only clobbers followed by RESX.
2474 Such BBs are kept around to make removal of dead stores possible with
2475 presence of EH and will be optimized out by optimize_clobbers later in the
2478 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2479 that can be clobber only, too.. When it is false, the RESX is not necessary
2480 on the end of basic block. */
2483 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2485 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2491 if (gsi_end_p (gsi
))
2493 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2497 else if (!single_succ_p (bb
))
2500 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2502 gimple
*stmt
= gsi_stmt (gsi
);
2503 if (is_gimple_debug (stmt
))
2505 if (gimple_clobber_p (stmt
))
2507 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2512 /* See if all predecessors are either throws or clobber only BBs. */
2513 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2514 if (!(e
->flags
& EDGE_EH
)
2515 && !clobber_only_eh_bb_p (e
->src
, false))
2521 /* Return true if STMT compute a floating point expression that may be affected
2522 by -ffast-math and similar flags. */
2525 fp_expression_p (gimple
*stmt
)
2530 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2531 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2536 /* Return true if T references memory location that is local
2537 for the function (that means, dead after return) or read-only. */
2540 refs_local_or_readonly_memory_p (tree t
)
2542 /* Non-escaping memory is fine. */
2543 t
= get_base_address (t
);
2544 if ((TREE_CODE (t
) == MEM_REF
2545 || TREE_CODE (t
) == TARGET_MEM_REF
))
2546 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2548 /* Automatic variables are fine. */
2550 && auto_var_in_fn_p (t
, current_function_decl
))
2553 /* Read-only variables are fine. */
2554 if (DECL_P (t
) && TREE_READONLY (t
))
2560 /* Return true if T is a pointer pointing to memory location that is local
2561 for the function (that means, dead after return) or read-only. */
2564 points_to_local_or_readonly_memory_p (tree t
)
2566 /* See if memory location is clearly invalid. */
2567 if (integer_zerop (t
))
2568 return flag_delete_null_pointer_checks
;
2569 if (TREE_CODE (t
) == SSA_NAME
)
2571 /* For IPA passes we can consinder accesses to return slot local
2572 even if it is not local in the sense that memory is dead by
2573 the end of founction.
2574 The outer function will see a store in the call assignment
2575 and thus this will do right thing for all uses of this
2576 function in the current IPA passes (modref, pure/const discovery
2577 and inlining heuristics). */
2578 if (DECL_RESULT (current_function_decl
)
2579 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl
))
2580 && t
== ssa_default_def (cfun
, DECL_RESULT (current_function_decl
)))
2582 return !ptr_deref_may_alias_global_p (t
, false);
2584 if (TREE_CODE (t
) == ADDR_EXPR
)
2585 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2590 /* Analyze function body for NODE.
2591 EARLY indicates run from early optimization pipeline. */
2594 analyze_function_body (struct cgraph_node
*node
, bool early
)
2596 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2597 /* Estimate static overhead for function prologue/epilogue and alignment. */
2598 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2599 /* Benefits are scaled by probability of elimination that is in range
2602 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2604 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2605 ipa_node_params
*params_summary
2606 = early
? NULL
: ipa_node_params_sum
->get (node
);
2607 ipa_predicate bb_predicate
;
2608 struct ipa_func_body_info fbi
;
2609 vec
<ipa_predicate
> nonconstant_names
= vNULL
;
2612 gimple
*fix_builtin_expect_stmt
;
2614 gcc_assert (my_function
&& my_function
->cfg
);
2615 gcc_assert (cfun
== my_function
);
2617 memset(&fbi
, 0, sizeof(fbi
));
2618 vec_free (info
->conds
);
2620 info
->size_time_table
.release ();
2621 info
->call_size_time_table
.release ();
2623 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2624 so we can produce proper inline hints.
2626 When optimizing and analyzing for early inliner, initialize node params
2627 so we can produce correct BB predicates. */
2629 if (opt_for_fn (node
->decl
, optimize
))
2631 calculate_dominance_info (CDI_DOMINATORS
);
2632 calculate_dominance_info (CDI_POST_DOMINATORS
);
2634 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2637 ipa_check_create_node_params ();
2638 ipa_initialize_node_params (node
);
2641 if (ipa_node_params_sum
)
2644 fbi
.info
= ipa_node_params_sum
->get (node
);
2645 fbi
.bb_infos
= vNULL
;
2646 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2647 fbi
.param_count
= count_formal_params (node
->decl
);
2648 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2650 nonconstant_names
.safe_grow_cleared
2651 (SSANAMES (my_function
)->length (), true);
2656 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2657 node
->dump_name ());
2659 /* When we run into maximal number of entries, we assign everything to the
2660 constant truth case. Be sure to have it in list. */
2661 bb_predicate
= true;
2662 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2664 bb_predicate
= ipa_predicate::not_inlined ();
2665 info
->account_size_time (opt_for_fn (node
->decl
,
2666 param_uninlined_function_insns
)
2667 * ipa_fn_summary::size_scale
,
2668 opt_for_fn (node
->decl
,
2669 param_uninlined_function_time
),
2673 /* Only look for target information for inlinable functions. */
2674 bool scan_for_target_info
=
2676 && targetm
.target_option
.need_ipa_fn_target_info (node
->decl
,
2680 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2681 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2682 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2683 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2684 for (n
= 0; n
< nblocks
; n
++)
2686 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2687 freq
= bb
->count
.to_sreal_scale (entry_count
);
2688 if (clobber_only_eh_bb_p (bb
))
2690 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2691 fprintf (dump_file
, "\n Ignoring BB %i;"
2692 " it will be optimized away by cleanup_clobbers\n",
2697 /* TODO: Obviously predicates can be propagated down across CFG. */
2701 bb_predicate
= *(ipa_predicate
*)bb
->aux
;
2703 bb_predicate
= false;
2706 bb_predicate
= true;
2708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2710 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2711 bb_predicate
.dump (dump_file
, info
->conds
);
2714 if (fbi
.info
&& nonconstant_names
.exists ())
2716 ipa_predicate phi_predicate
;
2717 bool first_phi
= true;
2719 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2723 && !phi_result_unknown_predicate (&fbi
, info
,
2730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2732 fprintf (dump_file
, " ");
2733 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2735 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2740 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2742 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2743 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2745 gimple
*stmt
= gsi_stmt (bsi
);
2746 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2747 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2749 ipa_predicate will_be_nonconstant
;
2751 /* This relation stmt should be folded after we remove
2752 __builtin_expect call. Adjust the cost here. */
2753 if (stmt
== fix_builtin_expect_stmt
)
2759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2761 fprintf (dump_file
, " ");
2762 print_gimple_stmt (dump_file
, stmt
, 0);
2763 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2764 freq
.to_double (), this_size
,
2768 if (is_gimple_call (stmt
)
2769 && !gimple_call_internal_p (stmt
))
2771 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2772 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2774 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2775 resolved as constant. We however don't want to optimize
2776 out the cgraph edges. */
2777 if (nonconstant_names
.exists ()
2778 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2779 && gimple_call_lhs (stmt
)
2780 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2782 ipa_predicate false_p
= false;
2783 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2786 if (ipa_node_params_sum
)
2788 int count
= gimple_call_num_args (stmt
);
2792 es
->param
.safe_grow_cleared (count
, true);
2793 for (i
= 0; i
< count
; i
++)
2795 int prob
= param_change_prob (&fbi
, stmt
, i
);
2796 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2797 es
->param
[i
].change_prob
= prob
;
2798 es
->param
[i
].points_to_local_or_readonly_memory
2799 = points_to_local_or_readonly_memory_p
2800 (gimple_call_arg (stmt
, i
));
2803 /* We cannot setup VLA parameters during inlining. */
2804 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2805 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2807 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2810 es
->call_stmt_size
= this_size
;
2811 es
->call_stmt_time
= this_time
;
2812 es
->loop_depth
= bb_loop_depth (bb
);
2813 edge_set_predicate (edge
, &bb_predicate
);
2814 if (edge
->speculative
)
2816 cgraph_edge
*indirect
2817 = edge
->speculative_call_indirect_edge ();
2818 ipa_call_summary
*es2
2819 = ipa_call_summaries
->get_create (indirect
);
2820 ipa_call_summaries
->duplicate (edge
, indirect
,
2823 /* Edge is the first direct call.
2824 create and duplicate call summaries for multiple
2825 speculative call targets. */
2826 for (cgraph_edge
*direct
2827 = edge
->next_speculative_call_target ();
2829 direct
= direct
->next_speculative_call_target ())
2831 ipa_call_summary
*es3
2832 = ipa_call_summaries
->get_create (direct
);
2833 ipa_call_summaries
->duplicate (edge
, direct
,
2839 /* TODO: When conditional jump or switch is known to be constant, but
2840 we did not translate it into the predicates, we really can account
2841 just maximum of the possible paths. */
2844 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2845 stmt
, nonconstant_names
);
2847 will_be_nonconstant
= true;
2848 if (this_time
|| this_size
)
2850 sreal final_time
= (sreal
)this_time
* freq
;
2852 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2853 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2855 "\t\t50%% will be eliminated by inlining\n");
2856 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2857 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2859 ipa_predicate p
= bb_predicate
& will_be_nonconstant
;
2861 /* We can ignore statement when we proved it is never going
2862 to happen, but we cannot do that for call statements
2863 because edges are accounted specially. */
2865 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2871 /* We account everything but the calls. Calls have their own
2872 size/time info attached to cgraph edges. This is necessary
2873 in order to make the cost disappear after inlining. */
2874 if (!is_gimple_call (stmt
))
2879 = bb_predicate
& ipa_predicate::not_inlined ();
2880 info
->account_size_time (this_size
* prob
,
2881 (final_time
* prob
) / 2, ip
,
2885 info
->account_size_time (this_size
* (2 - prob
),
2886 (final_time
* (2 - prob
) / 2),
2891 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2893 info
->fp_expressions
= true;
2895 fprintf (dump_file
, " fp_expression set\n");
2899 /* For target specific information, we want to scan all statements
2900 rather than those statements with non-zero weights, to avoid
2901 missing to scan something interesting for target information,
2902 such as: internal function calls. */
2903 if (scan_for_target_info
)
2904 scan_for_target_info
=
2905 targetm
.target_option
.update_ipa_fn_target_info
2906 (info
->target_info
, stmt
);
2908 /* Account cost of address calculations in the statements. */
2909 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2911 for (tree op
= gimple_op (stmt
, i
);
2912 op
&& handled_component_p (op
);
2913 op
= TREE_OPERAND (op
, 0))
2914 if ((TREE_CODE (op
) == ARRAY_REF
2915 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2916 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2918 ipa_predicate p
= bb_predicate
;
2920 p
= p
& will_be_nonconstant_expr_predicate
2921 (&fbi
, info
, params_summary
,
2922 TREE_OPERAND (op
, 1),
2930 "\t\tAccounting address calculation.\n");
2931 info
->account_size_time (ipa_fn_summary::size_scale
,
2943 if (nonconstant_names
.exists () && !early
)
2945 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2946 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
2947 param_ipa_max_loop_predicates
);
2949 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2950 flow_loops_dump (dump_file
, NULL
, 0);
2952 for (auto loop
: loops_list (cfun
, 0))
2954 ipa_predicate loop_iterations
= true;
2958 class tree_niter_desc niter_desc
;
2959 if (!loop
->header
->aux
)
2962 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2963 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2965 bb_predicate
= *(ipa_predicate
*)loop
->header
->aux
;
2966 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
2967 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2968 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2969 && !is_gimple_min_invariant (niter_desc
.niter
))
2971 ipa_predicate will_be_nonconstant
2972 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2976 if (will_be_nonconstant
!= true)
2977 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2978 if (will_be_nonconstant
!= true
2979 && will_be_nonconstant
!= false)
2980 loop_iterations
&= will_be_nonconstant
;
2982 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
2983 phdr_freq
, max_loop_predicates
);
2986 /* To avoid quadratic behavior we analyze stride predicates only
2987 with respect to the containing loop. Thus we simply iterate
2988 over all defs in the outermost loop body. */
2989 for (class loop
*loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2990 loop
!= NULL
; loop
= loop
->next
)
2992 ipa_predicate loop_stride
= true;
2993 basic_block
*body
= get_loop_body (loop
);
2994 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2995 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2996 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2998 gimple_stmt_iterator gsi
;
3002 bb_predicate
= *(ipa_predicate
*)body
[i
]->aux
;
3003 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
3006 gimple
*stmt
= gsi_stmt (gsi
);
3008 if (!is_gimple_assign (stmt
))
3011 tree def
= gimple_assign_lhs (stmt
);
3012 if (TREE_CODE (def
) != SSA_NAME
)
3016 if (!simple_iv (loop_containing_stmt (stmt
),
3017 loop_containing_stmt (stmt
),
3019 || is_gimple_min_invariant (iv
.step
))
3022 ipa_predicate will_be_nonconstant
3023 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3027 if (will_be_nonconstant
!= true)
3028 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3029 if (will_be_nonconstant
!= true
3030 && will_be_nonconstant
!= false)
3031 loop_stride
= loop_stride
& will_be_nonconstant
;
3034 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3035 phdr_freq
, max_loop_predicates
);
3040 FOR_ALL_BB_FN (bb
, my_function
)
3046 edge_predicate_pool
.remove ((ipa_predicate
*)bb
->aux
);
3048 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3051 edge_predicate_pool
.remove ((ipa_predicate
*)e
->aux
);
3055 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3056 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3058 ss
->self_size
= size
;
3059 nonconstant_names
.release ();
3060 ipa_release_body_info (&fbi
);
3061 if (opt_for_fn (node
->decl
, optimize
))
3064 loop_optimizer_finalize ();
3065 else if (!ipa_edge_args_sum
)
3066 ipa_free_all_node_params ();
3067 free_dominance_info (CDI_DOMINATORS
);
3068 free_dominance_info (CDI_POST_DOMINATORS
);
3072 fprintf (dump_file
, "\n");
3073 ipa_dump_fn_summary (dump_file
, node
);
3078 /* Compute function summary.
3079 EARLY is true when we compute parameters during early opts. */
3082 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3084 HOST_WIDE_INT self_stack_size
;
3085 struct cgraph_edge
*e
;
3087 gcc_assert (!node
->inlined_to
);
3089 if (!ipa_fn_summaries
)
3090 ipa_fn_summary_alloc ();
3092 /* Create a new ipa_fn_summary. */
3093 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3094 ipa_fn_summaries
->remove (node
);
3095 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3096 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3098 /* Estimate the stack size for the function if we're optimizing. */
3099 self_stack_size
= optimize
&& !node
->thunk
3100 ? estimated_stack_frame_size (node
) : 0;
3101 size_info
->estimated_self_stack_size
= self_stack_size
;
3102 info
->estimated_stack_size
= self_stack_size
;
3106 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3107 ipa_predicate t
= true;
3109 node
->can_change_signature
= false;
3110 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3111 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3112 info
->account_size_time (ipa_fn_summary::size_scale
3113 * opt_for_fn (node
->decl
,
3114 param_uninlined_function_thunk_insns
),
3115 opt_for_fn (node
->decl
,
3116 param_uninlined_function_thunk_time
), t
, t
);
3117 t
= ipa_predicate::not_inlined ();
3118 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3119 ipa_update_overall_fn_summary (node
);
3120 size_info
->self_size
= size_info
->size
;
3121 if (stdarg_p (TREE_TYPE (node
->decl
)))
3123 info
->inlinable
= false;
3124 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3127 info
->inlinable
= true;
3131 /* Even is_gimple_min_invariant rely on current_function_decl. */
3132 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3134 /* During IPA profile merging we may be called w/o virtual SSA form
3136 update_ssa (TODO_update_ssa_only_virtuals
);
3138 /* Can this function be inlined at all? */
3139 if (!opt_for_fn (node
->decl
, optimize
)
3140 && !lookup_attribute ("always_inline",
3141 DECL_ATTRIBUTES (node
->decl
)))
3142 info
->inlinable
= false;
3144 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3146 bool no_signature
= false;
3147 /* Type attributes can use parameter indices to describe them.
3148 Special case fn spec since we can safely preserve them in
3149 modref summaries. */
3150 for (tree list
= TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
));
3151 list
&& !no_signature
; list
= TREE_CHAIN (list
))
3152 if (!ipa_param_adjustments::type_attribute_allowed_p
3153 (get_attribute_name (list
)))
3157 fprintf (dump_file
, "No signature change:"
3158 " function type has unhandled attribute %s.\n",
3159 IDENTIFIER_POINTER (get_attribute_name (list
)));
3161 no_signature
= true;
3163 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
3164 parm
&& !no_signature
; parm
= DECL_CHAIN (parm
))
3165 if (variably_modified_type_p (TREE_TYPE (parm
), node
->decl
))
3169 fprintf (dump_file
, "No signature change:"
3170 " has parameter with variably modified type.\n");
3172 no_signature
= true;
3175 /* Likewise for #pragma omp declare simd functions or functions
3176 with simd attribute. */
3178 || lookup_attribute ("omp declare simd",
3179 DECL_ATTRIBUTES (node
->decl
)))
3180 node
->can_change_signature
= false;
3183 /* Otherwise, inlinable functions always can change signature. */
3184 if (info
->inlinable
)
3185 node
->can_change_signature
= true;
3188 /* Functions calling builtin_apply cannot change signature. */
3189 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3191 tree
cdecl = e
->callee
->decl
;
3192 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
,
3196 node
->can_change_signature
= !e
;
3199 analyze_function_body (node
, early
);
3203 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3204 size_info
->size
= size_info
->self_size
;
3205 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3207 /* Code above should compute exactly the same result as
3208 ipa_update_overall_fn_summary except for case when speculative
3209 edges are present since these are accounted to size but not
3210 self_size. Do not compare time since different order the roundoff
3211 errors result in slight changes. */
3212 ipa_update_overall_fn_summary (node
);
3215 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3218 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3223 /* Compute parameters of functions used by inliner using
3224 current_function_decl. */
3227 compute_fn_summary_for_current (void)
3229 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3233 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3234 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3235 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3238 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3239 int *size
, int *time
,
3240 ipa_call_arg_values
*avals
)
3243 struct cgraph_node
*callee
;
3244 class ipa_fn_summary
*isummary
;
3245 enum availability avail
;
3249 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3251 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3254 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3255 if (!target
|| speculative
)
3258 /* Account for difference in cost between indirect and direct calls. */
3259 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3260 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3261 gcc_checking_assert (*time
>= 0);
3262 gcc_checking_assert (*size
>= 0);
3264 callee
= cgraph_node::get (target
);
3265 if (!callee
|| !callee
->definition
)
3267 callee
= callee
->function_symbol (&avail
);
3268 if (avail
< AVAIL_AVAILABLE
)
3270 isummary
= ipa_fn_summaries
->get (callee
);
3271 if (isummary
== NULL
)
3274 return isummary
->inlinable
;
3277 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3278 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3279 devirtualized. AVALS, if non-NULL, describes the context of the call site
3280 as far as values of parameters are concerened. */
3283 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3284 sreal
*time
, ipa_call_arg_values
*avals
,
3287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3288 int call_size
= es
->call_stmt_size
;
3289 int call_time
= es
->call_stmt_time
;
3292 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3293 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3294 *hints
|= INLINE_HINT_indirect_call
;
3295 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3298 *min_size
+= cur_size
;
3300 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3304 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3305 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3308 Helper for estimate_calls_size_and_time which does the same but
3309 (in most cases) faster. */
3312 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3313 int *min_size
, sreal
*time
,
3315 clause_t possible_truths
,
3316 ipa_call_arg_values
*avals
)
3318 struct cgraph_edge
*e
;
3319 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3321 if (!e
->inline_failed
)
3323 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3324 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3325 hints
, possible_truths
, avals
);
3329 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3331 /* Do not care about zero sized builtins. */
3332 if (!es
->call_stmt_size
)
3334 gcc_checking_assert (!es
->call_stmt_time
);
3338 || es
->predicate
->evaluate (possible_truths
))
3340 /* Predicates of calls shall not use NOT_CHANGED codes,
3341 so we do not need to compute probabilities. */
3342 estimate_edge_size_and_time (e
, size
,
3343 es
->predicate
? NULL
: min_size
,
3344 time
, avals
, hints
);
3347 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3349 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3351 || es
->predicate
->evaluate (possible_truths
))
3352 estimate_edge_size_and_time (e
, size
,
3353 es
->predicate
? NULL
: min_size
,
3354 time
, avals
, hints
);
3358 /* Populate sum->call_size_time_table for edges from NODE. */
3361 summarize_calls_size_and_time (struct cgraph_node
*node
,
3362 ipa_fn_summary
*sum
)
3364 struct cgraph_edge
*e
;
3365 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3367 if (!e
->inline_failed
)
3369 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3370 summarize_calls_size_and_time (e
->callee
, sum
);
3376 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3378 ipa_predicate pred
= true;
3379 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3382 pred
= *es
->predicate
;
3383 sum
->account_size_time (size
, time
, pred
, pred
, true);
3385 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3390 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3391 ipa_predicate pred
= true;
3392 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3395 pred
= *es
->predicate
;
3396 sum
->account_size_time (size
, time
, pred
, pred
, true);
3400 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3401 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3402 context of the call site. */
3405 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3406 int *min_size
, sreal
*time
,
3408 clause_t possible_truths
,
3409 ipa_call_arg_values
*avals
)
3411 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3412 bool use_table
= true;
3414 gcc_assert (node
->callees
|| node
->indirect_calls
);
3416 /* During early inlining we do not calculate info for very
3417 large functions and thus there is no need for producing
3419 if (!ipa_node_params_sum
)
3421 /* Do not calculate summaries for simple wrappers; it is waste
3423 else if (node
->callees
&& node
->indirect_calls
3424 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3426 /* If there is an indirect edge that may be optimized, we need
3427 to go the slow way. */
3428 else if (avals
&& hints
3429 && (avals
->m_known_vals
.length ()
3430 || avals
->m_known_contexts
.length ()
3431 || avals
->m_known_aggs
.length ()))
3433 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (node
);
3434 unsigned int nargs
= params_summary
3435 ? ipa_get_param_count (params_summary
) : 0;
3437 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3439 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3440 && (avals
->safe_sval_at (i
)
3441 || (ipa_argagg_value_list (avals
).value_for_index_p (i
))))
3443 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3444 && (avals
->m_known_contexts
.length () > i
3445 && !avals
->m_known_contexts
[i
].useless_p ()))
3450 /* Fast path is via the call size time table. */
3453 /* Build summary if it is absent. */
3454 if (!sum
->call_size_time_table
.length ())
3456 ipa_predicate true_pred
= true;
3457 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3458 summarize_calls_size_and_time (node
, sum
);
3461 int old_size
= *size
;
3462 sreal old_time
= time
? *time
: 0;
3465 *min_size
+= sum
->call_size_time_table
[0].size
;
3470 /* Walk the table and account sizes and times. */
3471 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3473 if (e
->exec_predicate
.evaluate (possible_truths
))
3480 /* Be careful and see if both methods agree. */
3481 if ((flag_checking
|| dump_file
)
3482 /* Do not try to sanity check when we know we lost some
3484 && sum
->call_size_time_table
.length ()
3485 < ipa_fn_summary::max_size_time_table_size
)
3487 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3488 possible_truths
, avals
);
3489 gcc_assert (*size
== old_size
);
3490 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3492 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3493 old_time
.to_double (),
3494 time
->to_double ());
3497 /* Slow path by walking all edges. */
3499 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3500 possible_truths
, avals
);
3503 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3504 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3507 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3508 clause_t nonspec_possible_truths
,
3509 vec
<inline_param_summary
>
3510 inline_param_summary
,
3511 ipa_auto_call_arg_values
*arg_values
)
3512 : m_node (node
), m_possible_truths (possible_truths
),
3513 m_nonspec_possible_truths (nonspec_possible_truths
),
3514 m_inline_param_summary (inline_param_summary
),
3515 m_avals (arg_values
)
3519 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3522 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3524 m_node
= ctx
.m_node
;
3525 m_possible_truths
= ctx
.m_possible_truths
;
3526 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3527 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3528 unsigned int nargs
= params_summary
3529 ? ipa_get_param_count (params_summary
) : 0;
3531 m_inline_param_summary
= vNULL
;
3532 /* Copy the info only if there is at least one useful entry. */
3533 if (ctx
.m_inline_param_summary
.exists ())
3535 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3537 for (unsigned int i
= 0; i
< n
; i
++)
3538 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3539 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3541 m_inline_param_summary
3542 = ctx
.m_inline_param_summary
.copy ();
3546 m_avals
.m_known_vals
= vNULL
;
3547 if (ctx
.m_avals
.m_known_vals
.exists ())
3549 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3551 for (unsigned int i
= 0; i
< n
; i
++)
3552 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3553 && ctx
.m_avals
.m_known_vals
[i
])
3555 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3560 m_avals
.m_known_contexts
= vNULL
;
3561 if (ctx
.m_avals
.m_known_contexts
.exists ())
3563 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3565 for (unsigned int i
= 0; i
< n
; i
++)
3566 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3567 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3569 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3574 m_avals
.m_known_aggs
= vNULL
;
3575 if (ctx
.m_avals
.m_known_aggs
.exists ())
3577 const ipa_argagg_value_list
avl (&ctx
.m_avals
);
3578 for (unsigned int i
= 0; i
< nargs
; i
++)
3579 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3580 && avl
.value_for_index_p (i
))
3582 m_avals
.m_known_aggs
= ctx
.m_avals
.m_known_aggs
.copy ();
3587 m_avals
.m_known_value_ranges
= vNULL
;
3590 /* Release memory used by known_vals/contexts/aggs vectors. and
3591 inline_param_summary. */
3594 ipa_cached_call_context::release ()
3596 /* See if context is initialized at first place. */
3599 m_avals
.m_known_aggs
.release ();
3600 m_avals
.m_known_vals
.release ();
3601 m_avals
.m_known_contexts
.release ();
3602 m_inline_param_summary
.release ();
3605 /* Return true if CTX describes the same call context as THIS. */
3608 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3610 if (m_node
!= ctx
.m_node
3611 || m_possible_truths
!= ctx
.m_possible_truths
3612 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3615 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3616 unsigned int nargs
= params_summary
3617 ? ipa_get_param_count (params_summary
) : 0;
3619 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3621 for (unsigned int i
= 0; i
< nargs
; i
++)
3623 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3625 if (i
>= m_inline_param_summary
.length ()
3626 || m_inline_param_summary
[i
].useless_p ())
3628 if (i
< ctx
.m_inline_param_summary
.length ()
3629 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3633 if (i
>= ctx
.m_inline_param_summary
.length ()
3634 || ctx
.m_inline_param_summary
[i
].useless_p ())
3636 if (i
< m_inline_param_summary
.length ()
3637 && !m_inline_param_summary
[i
].useless_p ())
3641 if (!m_inline_param_summary
[i
].equal_to
3642 (ctx
.m_inline_param_summary
[i
]))
3646 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3648 for (unsigned int i
= 0; i
< nargs
; i
++)
3650 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3652 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3654 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3655 && ctx
.m_avals
.m_known_vals
[i
])
3659 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3660 || !ctx
.m_avals
.m_known_vals
[i
])
3662 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3666 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3670 if (m_avals
.m_known_contexts
.exists ()
3671 || ctx
.m_avals
.m_known_contexts
.exists ())
3673 for (unsigned int i
= 0; i
< nargs
; i
++)
3675 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3677 if (i
>= m_avals
.m_known_contexts
.length ()
3678 || m_avals
.m_known_contexts
[i
].useless_p ())
3680 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3681 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3685 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3686 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3688 if (i
< m_avals
.m_known_contexts
.length ()
3689 && !m_avals
.m_known_contexts
[i
].useless_p ())
3693 if (!m_avals
.m_known_contexts
[i
].equal_to
3694 (ctx
.m_avals
.m_known_contexts
[i
]))
3698 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3700 unsigned i
= 0, j
= 0;
3701 while (i
< m_avals
.m_known_aggs
.length ()
3702 || j
< ctx
.m_avals
.m_known_aggs
.length ())
3704 if (i
>= m_avals
.m_known_aggs
.length ())
3706 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3707 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3712 if (j
>= ctx
.m_avals
.m_known_aggs
.length ())
3714 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3715 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3721 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3722 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3725 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3732 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3737 if (!ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3744 if ((m_avals
.m_known_aggs
[i
].unit_offset
3745 != ctx
.m_avals
.m_known_aggs
[j
].unit_offset
)
3746 || (m_avals
.m_known_aggs
[i
].by_ref
3747 != ctx
.m_avals
.m_known_aggs
[j
].by_ref
)
3748 || !operand_equal_p (m_avals
.m_known_aggs
[i
].value
,
3749 ctx
.m_avals
.m_known_aggs
[j
].value
))
3758 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3759 this context. Always compute size and min_size. Only compute time and
3760 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3764 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3765 bool est_times
, bool est_hints
)
3767 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3772 ipa_hints hints
= 0;
3773 sreal loops_with_known_iterations
= 0;
3774 sreal loops_with_known_strides
= 0;
3777 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3780 fprintf (dump_file
, " Estimating body: %s\n"
3781 " Known to be false: ", m_node
->dump_name ());
3783 for (i
= ipa_predicate::not_inlined_condition
;
3784 i
< (ipa_predicate::first_dynamic_condition
3785 + (int) vec_safe_length (info
->conds
)); i
++)
3786 if (!(m_possible_truths
& (1 << i
)))
3789 fprintf (dump_file
, ", ");
3791 dump_condition (dump_file
, info
->conds
, i
);
3795 if (m_node
->callees
|| m_node
->indirect_calls
)
3796 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3797 est_times
? &time
: NULL
,
3798 est_hints
? &hints
: NULL
, m_possible_truths
,
3801 sreal nonspecialized_time
= time
;
3803 min_size
+= info
->size_time_table
[0].size
;
3804 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3806 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3808 /* Because predicates are conservative, it can happen that nonconst is 1
3812 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3814 gcc_checking_assert (e
->time
>= 0);
3815 gcc_checking_assert (time
>= 0);
3817 /* We compute specialized size only because size of nonspecialized
3818 copy is context independent.
3820 The difference between nonspecialized execution and specialized is
3821 that nonspecialized is not going to have optimized out computations
3822 known to be constant in a specialized setting. */
3827 nonspecialized_time
+= e
->time
;
3830 else if (!m_inline_param_summary
.exists ())
3837 int prob
= e
->nonconst_predicate
.probability
3838 (info
->conds
, m_possible_truths
,
3839 m_inline_param_summary
);
3840 gcc_checking_assert (prob
>= 0);
3841 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3842 if (prob
== REG_BR_PROB_BASE
)
3845 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3847 gcc_checking_assert (time
>= 0);
3850 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3851 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3852 gcc_checking_assert (min_size
>= 0);
3853 gcc_checking_assert (size
>= 0);
3854 gcc_checking_assert (time
>= 0);
3855 /* nonspecialized_time should be always bigger than specialized time.
3856 Roundoff issues however may get into the way. */
3857 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3859 /* Roundoff issues may make specialized time bigger than nonspecialized
3860 time. We do not really want that to happen because some heuristics
3861 may get confused by seeing negative speedups. */
3862 if (time
> nonspecialized_time
)
3863 time
= nonspecialized_time
;
3868 hints
|= INLINE_HINT_in_scc
;
3869 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3870 hints
|= INLINE_HINT_declared_inline
;
3871 if (info
->builtin_constant_p_parms
.length ()
3872 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3873 hints
|= INLINE_HINT_builtin_constant_p
;
3875 ipa_freqcounting_predicate
*fcp
;
3876 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3877 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3879 hints
|= INLINE_HINT_loop_iterations
;
3880 loops_with_known_iterations
+= fcp
->freq
;
3882 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3884 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3885 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3887 hints
|= INLINE_HINT_loop_stride
;
3888 loops_with_known_strides
+= fcp
->freq
;
3890 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3893 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3894 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3896 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3898 fprintf (dump_file
, "\n size:%i", (int) size
);
3900 fprintf (dump_file
, " time:%f nonspec time:%f",
3901 time
.to_double (), nonspecialized_time
.to_double ());
3903 fprintf (dump_file
, " loops with known iterations:%f "
3904 "known strides:%f", loops_with_known_iterations
.to_double (),
3905 loops_with_known_strides
.to_double ());
3906 fprintf (dump_file
, "\n");
3910 estimates
->time
= time
;
3911 estimates
->nonspecialized_time
= nonspecialized_time
;
3913 estimates
->size
= size
;
3914 estimates
->min_size
= min_size
;
3916 estimates
->hints
= hints
;
3921 /* Estimate size and time needed to execute callee of EDGE assuming that
3922 parameters known to be constant at caller of EDGE are propagated.
3923 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3924 and types for parameters. */
3927 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3928 ipa_auto_call_arg_values
*avals
,
3929 ipa_call_estimates
*estimates
)
3931 clause_t clause
, nonspec_clause
;
3933 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
3935 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
3936 ctx
.estimate_size_and_time (estimates
);
3939 /* Return stack frame offset where frame of NODE is supposed to start inside
3940 of the function it is inlined to.
3941 Return 0 for functions that are not inlined. */
3944 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3946 HOST_WIDE_INT offset
= 0;
3947 if (!node
->inlined_to
)
3949 node
= node
->callers
->caller
;
3952 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3953 if (!node
->inlined_to
)
3955 node
= node
->callers
->caller
;
3960 /* Update summary information of inline clones after inlining.
3961 Compute peak stack usage. */
3964 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3966 struct cgraph_edge
*e
;
3968 ipa_propagate_frequency (node
);
3969 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3971 if (!e
->inline_failed
)
3972 inline_update_callee_summaries (e
->callee
, depth
);
3974 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3976 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3977 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3980 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3981 INLINED_EDGE has been inlined.
3983 When function A is inlined in B and A calls C with parameter that
3984 changes with probability PROB1 and C is known to be passthrough
3985 of argument if B that change with probability PROB2, the probability
3986 of change is now PROB1*PROB2. */
3989 remap_edge_params (struct cgraph_edge
*inlined_edge
,
3990 struct cgraph_edge
*edge
)
3992 if (ipa_node_params_sum
)
3995 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
3998 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3999 class ipa_call_summary
*inlined_es
4000 = ipa_call_summaries
->get (inlined_edge
);
4002 if (es
->param
.length () == 0)
4005 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
4007 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4008 if (jfunc
->type
== IPA_JF_PASS_THROUGH
4009 || jfunc
->type
== IPA_JF_ANCESTOR
)
4011 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
4012 ? ipa_get_jf_pass_through_formal_id (jfunc
)
4013 : ipa_get_jf_ancestor_formal_id (jfunc
);
4014 if (id
< (int) inlined_es
->param
.length ())
4016 int prob1
= es
->param
[i
].change_prob
;
4017 int prob2
= inlined_es
->param
[id
].change_prob
;
4018 int prob
= combine_probabilities (prob1
, prob2
);
4020 if (prob1
&& prob2
&& !prob
)
4023 es
->param
[i
].change_prob
= prob
;
4026 ->param
[id
].points_to_local_or_readonly_memory
)
4027 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4029 if (!es
->param
[i
].points_to_local_or_readonly_memory
4030 && jfunc
->type
== IPA_JF_CONST
4031 && points_to_local_or_readonly_memory_p
4032 (ipa_get_jf_constant (jfunc
)))
4033 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4039 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4041 Remap predicates of callees of NODE. Rest of arguments match
4044 Also update change probabilities. */
4047 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
4048 struct cgraph_node
*node
,
4049 class ipa_fn_summary
*info
,
4050 class ipa_node_params
*params_summary
,
4051 class ipa_fn_summary
*callee_info
,
4052 const vec
<int> &operand_map
,
4053 const vec
<HOST_WIDE_INT
> &offset_map
,
4054 clause_t possible_truths
,
4055 ipa_predicate
*toplev_predicate
)
4057 struct cgraph_edge
*e
, *next
;
4058 for (e
= node
->callees
; e
; e
= next
)
4061 next
= e
->next_callee
;
4063 if (e
->inline_failed
)
4065 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4066 remap_edge_params (inlined_edge
, e
);
4070 p
= es
->predicate
->remap_after_inlining
4071 (info
, params_summary
,
4072 callee_info
, operand_map
,
4073 offset_map
, possible_truths
,
4075 edge_set_predicate (e
, &p
);
4078 edge_set_predicate (e
, toplev_predicate
);
4081 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
4082 params_summary
, callee_info
,
4083 operand_map
, offset_map
, possible_truths
,
4086 for (e
= node
->indirect_calls
; e
; e
= next
)
4088 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4090 next
= e
->next_callee
;
4092 remap_edge_params (inlined_edge
, e
);
4095 p
= es
->predicate
->remap_after_inlining
4096 (info
, params_summary
,
4097 callee_info
, operand_map
, offset_map
,
4098 possible_truths
, *toplev_predicate
);
4099 edge_set_predicate (e
, &p
);
4102 edge_set_predicate (e
, toplev_predicate
);
4106 /* Run remap_after_inlining on each predicate in V. */
4109 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4110 class ipa_node_params
*params_summary
,
4111 class ipa_fn_summary
*callee_info
,
4112 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4113 const vec
<int> &operand_map
,
4114 const vec
<HOST_WIDE_INT
> &offset_map
,
4115 clause_t possible_truths
,
4116 ipa_predicate
*toplev_predicate
)
4119 ipa_freqcounting_predicate
*fcp
;
4120 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4123 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4124 callee_info
, operand_map
,
4125 offset_map
, possible_truths
,
4127 if (p
!= false && p
!= true)
4128 *fcp
->predicate
&= p
;
4132 /* We inlined EDGE. Update summary of the function we inlined into. */
4135 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4137 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4138 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4139 ? edge
->caller
->inlined_to
: edge
->caller
);
4140 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4141 clause_t clause
= 0; /* not_inline is known to be false. */
4143 auto_vec
<int, 8> operand_map
;
4144 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4146 ipa_predicate toplev_predicate
;
4147 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4148 ipa_node_params
*params_summary
= (ipa_node_params_sum
4149 ? ipa_node_params_sum
->get (to
) : NULL
);
4152 toplev_predicate
= *es
->predicate
;
4154 toplev_predicate
= true;
4156 info
->fp_expressions
|= callee_info
->fp_expressions
;
4157 info
->target_info
|= callee_info
->target_info
;
4159 if (callee_info
->conds
)
4161 ipa_auto_call_arg_values avals
;
4162 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4164 if (ipa_node_params_sum
&& callee_info
->conds
)
4166 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4167 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4172 operand_map
.safe_grow_cleared (count
, true);
4173 offset_map
.safe_grow_cleared (count
, true);
4175 for (i
= 0; i
< count
; i
++)
4177 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4180 /* TODO: handle non-NOPs when merging. */
4181 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4183 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4184 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4185 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4188 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4190 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4191 if (offset
>= 0 && offset
< INT_MAX
)
4193 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4194 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4196 offset_map
[i
] = offset
;
4199 operand_map
[i
] = map
;
4200 gcc_assert (map
< ipa_get_param_count (params_summary
));
4204 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4205 if (ip
< count
&& operand_map
[ip
] >= 0)
4206 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4208 sreal freq
= edge
->sreal_frequency ();
4209 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4212 p
= e
->exec_predicate
.remap_after_inlining
4213 (info
, params_summary
,
4214 callee_info
, operand_map
,
4217 ipa_predicate nonconstp
;
4218 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4219 (info
, params_summary
,
4220 callee_info
, operand_map
,
4223 if (p
!= false && nonconstp
!= false)
4225 sreal add_time
= ((sreal
)e
->time
* freq
);
4226 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4228 if (prob
!= REG_BR_PROB_BASE
)
4229 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4230 if (prob
!= REG_BR_PROB_BASE
4231 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4233 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4234 (double) prob
/ REG_BR_PROB_BASE
);
4236 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4239 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4240 callee_info
, operand_map
,
4241 offset_map
, clause
, &toplev_predicate
);
4242 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4243 info
->loop_iterations
, operand_map
,
4244 offset_map
, clause
, &toplev_predicate
);
4245 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4246 info
->loop_strides
, operand_map
,
4247 offset_map
, clause
, &toplev_predicate
);
4249 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4250 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4252 if (info
->estimated_stack_size
< peak
)
4253 info
->estimated_stack_size
= peak
;
4255 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4256 if (info
->call_size_time_table
.length ())
4259 sreal edge_time
= 0;
4261 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4262 /* Unaccount size and time of the optimized out call. */
4263 info
->account_size_time (-edge_size
, -edge_time
,
4264 es
->predicate
? *es
->predicate
: true,
4265 es
->predicate
? *es
->predicate
: true,
4267 /* Account new calls. */
4268 summarize_calls_size_and_time (edge
->callee
, info
);
4271 /* Free summaries that are not maintained for inline clones/edges. */
4272 ipa_call_summaries
->remove (edge
);
4273 ipa_fn_summaries
->remove (edge
->callee
);
4274 ipa_remove_from_growth_caches (edge
);
4277 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4278 overall size and time. Recompute it.
4279 If RESET is true also recompute call_time_size_table. */
4282 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4284 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4285 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4289 size_info
->size
= 0;
4291 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4293 size_info
->size
+= e
->size
;
4294 info
->time
+= e
->time
;
4296 info
->min_size
= info
->size_time_table
[0].size
;
4298 info
->call_size_time_table
.release ();
4299 if (node
->callees
|| node
->indirect_calls
)
4300 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4302 ~(clause_t
) (1 << ipa_predicate::false_condition
),
4304 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4305 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4309 /* This function performs intraprocedural analysis in NODE that is required to
4310 inline indirect calls. */
4313 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4315 ipa_analyze_node (node
);
4316 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4318 ipa_print_node_params (dump_file
, node
);
4319 ipa_print_node_jump_functions (dump_file
, node
);
4324 /* Note function body size. */
4327 inline_analyze_function (struct cgraph_node
*node
)
4329 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4332 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4333 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4334 inline_indirect_intraprocedural_analysis (node
);
4335 compute_fn_summary (node
, false);
4338 struct cgraph_edge
*e
;
4339 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4340 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4341 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4342 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4349 /* Called when new function is inserted to callgraph late. */
4352 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4354 inline_analyze_function (node
);
4357 /* Note function body size. */
4360 ipa_fn_summary_generate (void)
4362 struct cgraph_node
*node
;
4364 FOR_EACH_DEFINED_FUNCTION (node
)
4365 if (DECL_STRUCT_FUNCTION (node
->decl
))
4366 node
->versionable
= tree_versionable_function_p (node
->decl
);
4368 ipa_fn_summary_alloc ();
4370 ipa_fn_summaries
->enable_insertion_hook ();
4372 ipa_register_cgraph_hooks ();
4374 FOR_EACH_DEFINED_FUNCTION (node
)
4376 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4377 || opt_for_fn (node
->decl
, optimize
)))
4378 inline_analyze_function (node
);
4382 /* Write inline summary for edge E to OB. */
4385 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4388 class ipa_call_summary
*es
= prevails
4389 ? ipa_call_summaries
->get_create (e
) : NULL
;
4393 int size
= streamer_read_uhwi (ib
);
4394 int time
= streamer_read_uhwi (ib
);
4395 int depth
= streamer_read_uhwi (ib
);
4399 es
->call_stmt_size
= size
;
4400 es
->call_stmt_time
= time
;
4401 es
->loop_depth
= depth
;
4404 bitpack_d bp
= streamer_read_bitpack (ib
);
4406 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4408 bp_unpack_value (&bp
, 1);
4412 edge_set_predicate (e
, &p
);
4413 length
= streamer_read_uhwi (ib
);
4415 && (e
->possibly_call_in_translation_unit_p ()
4416 /* Also stream in jump functions to builtins in hope that they
4417 will get fnspecs. */
4418 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4420 es
->param
.safe_grow_cleared (length
, true);
4421 for (i
= 0; i
< length
; i
++)
4423 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4424 es
->param
[i
].points_to_local_or_readonly_memory
4425 = streamer_read_uhwi (ib
);
4430 for (i
= 0; i
< length
; i
++)
4432 streamer_read_uhwi (ib
);
4433 streamer_read_uhwi (ib
);
4439 /* Stream in inline summaries from the section. */
4442 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4445 const struct lto_function_header
*header
=
4446 (const struct lto_function_header
*) data
;
4447 const int cfg_offset
= sizeof (struct lto_function_header
);
4448 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4449 const int string_offset
= main_offset
+ header
->main_size
;
4450 class data_in
*data_in
;
4451 unsigned int i
, count2
, j
;
4452 unsigned int f_count
;
4454 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4455 file_data
->mode_table
);
4458 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4459 header
->string_size
, vNULL
);
4460 f_count
= streamer_read_uhwi (&ib
);
4461 for (i
= 0; i
< f_count
; i
++)
4464 struct cgraph_node
*node
;
4465 class ipa_fn_summary
*info
;
4466 class ipa_node_params
*params_summary
;
4467 class ipa_size_summary
*size_info
;
4468 lto_symtab_encoder_t encoder
;
4469 struct bitpack_d bp
;
4470 struct cgraph_edge
*e
;
4473 index
= streamer_read_uhwi (&ib
);
4474 encoder
= file_data
->symtab_node_encoder
;
4475 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4477 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4478 params_summary
= node
->prevailing_p ()
4479 ? ipa_node_params_sum
->get (node
) : NULL
;
4480 size_info
= node
->prevailing_p ()
4481 ? ipa_size_summaries
->get_create (node
) : NULL
;
4483 int stack_size
= streamer_read_uhwi (&ib
);
4484 int size
= streamer_read_uhwi (&ib
);
4485 sreal time
= sreal::stream_in (&ib
);
4489 info
->estimated_stack_size
4490 = size_info
->estimated_self_stack_size
= stack_size
;
4491 size_info
->size
= size_info
->self_size
= size
;
4495 bp
= streamer_read_bitpack (&ib
);
4498 info
->inlinable
= bp_unpack_value (&bp
, 1);
4499 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4500 if (!lto_stream_offload_p
)
4501 info
->target_info
= streamer_read_uhwi (&ib
);
4505 bp_unpack_value (&bp
, 1);
4506 bp_unpack_value (&bp
, 1);
4507 if (!lto_stream_offload_p
)
4508 streamer_read_uhwi (&ib
);
4511 count2
= streamer_read_uhwi (&ib
);
4512 gcc_assert (!info
|| !info
->conds
);
4514 vec_safe_reserve_exact (info
->conds
, count2
);
4515 for (j
= 0; j
< count2
; j
++)
4518 unsigned int k
, count3
;
4519 c
.operand_num
= streamer_read_uhwi (&ib
);
4520 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4521 c
.type
= stream_read_tree (&ib
, data_in
);
4522 c
.val
= stream_read_tree (&ib
, data_in
);
4523 bp
= streamer_read_bitpack (&ib
);
4524 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4525 c
.by_ref
= bp_unpack_value (&bp
, 1);
4527 c
.offset
= streamer_read_uhwi (&ib
);
4528 count3
= streamer_read_uhwi (&ib
);
4531 vec_safe_reserve_exact (c
.param_ops
, count3
);
4533 ipa_set_param_used_by_ipa_predicates
4534 (params_summary
, c
.operand_num
, true);
4535 for (k
= 0; k
< count3
; k
++)
4537 struct expr_eval_op op
;
4538 enum gimple_rhs_class rhs_class
;
4539 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4540 op
.type
= stream_read_tree (&ib
, data_in
);
4541 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4543 case GIMPLE_UNARY_RHS
:
4545 op
.val
[0] = NULL_TREE
;
4546 op
.val
[1] = NULL_TREE
;
4549 case GIMPLE_BINARY_RHS
:
4550 case GIMPLE_TERNARY_RHS
:
4551 bp
= streamer_read_bitpack (&ib
);
4552 op
.index
= bp_unpack_value (&bp
, 2);
4553 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4554 if (rhs_class
== GIMPLE_BINARY_RHS
)
4555 op
.val
[1] = NULL_TREE
;
4557 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4561 fatal_error (UNKNOWN_LOCATION
,
4562 "invalid fnsummary in LTO stream");
4565 c
.param_ops
->quick_push (op
);
4568 info
->conds
->quick_push (c
);
4570 count2
= streamer_read_uhwi (&ib
);
4571 gcc_assert (!info
|| !info
->size_time_table
.length ());
4573 info
->size_time_table
.reserve_exact (count2
);
4574 for (j
= 0; j
< count2
; j
++)
4576 class size_time_entry e
;
4578 e
.size
= streamer_read_uhwi (&ib
);
4579 e
.time
= sreal::stream_in (&ib
);
4580 e
.exec_predicate
.stream_in (&ib
);
4581 e
.nonconst_predicate
.stream_in (&ib
);
4584 info
->size_time_table
.quick_push (e
);
4587 count2
= streamer_read_uhwi (&ib
);
4588 for (j
= 0; j
< count2
; j
++)
4591 sreal fcp_freq
= sreal::stream_in (&ib
);
4594 ipa_freqcounting_predicate fcp
;
4595 fcp
.predicate
= NULL
;
4596 set_hint_predicate (&fcp
.predicate
, p
);
4597 fcp
.freq
= fcp_freq
;
4598 vec_safe_push (info
->loop_iterations
, fcp
);
4601 count2
= streamer_read_uhwi (&ib
);
4602 for (j
= 0; j
< count2
; j
++)
4605 sreal fcp_freq
= sreal::stream_in (&ib
);
4608 ipa_freqcounting_predicate fcp
;
4609 fcp
.predicate
= NULL
;
4610 set_hint_predicate (&fcp
.predicate
, p
);
4611 fcp
.freq
= fcp_freq
;
4612 vec_safe_push (info
->loop_strides
, fcp
);
4615 count2
= streamer_read_uhwi (&ib
);
4617 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4618 for (j
= 0; j
< count2
; j
++)
4620 int parm
= streamer_read_uhwi (&ib
);
4622 info
->builtin_constant_p_parms
.quick_push (parm
);
4624 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4625 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4626 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4627 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4630 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4632 lto_data_in_delete (data_in
);
4636 /* Read inline summary. Jump functions are shared among ipa-cp
4637 and inliner, so when ipa-cp is active, we don't need to write them
4641 ipa_fn_summary_read (void)
4643 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4644 struct lto_file_decl_data
*file_data
;
4647 ipa_prop_read_jump_functions ();
4648 ipa_fn_summary_alloc ();
4650 while ((file_data
= file_data_vec
[j
++]))
4654 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4657 inline_read_section (file_data
, data
, len
);
4659 /* Fatal error here. We do not want to support compiling ltrans units
4660 with different version of compiler or different flags than the WPA
4661 unit, so this should never happen. */
4662 fatal_error (input_location
,
4663 "ipa inline summary is missing in input file");
4665 ipa_register_cgraph_hooks ();
4667 gcc_assert (ipa_fn_summaries
);
4668 ipa_fn_summaries
->enable_insertion_hook ();
4672 /* Write inline summary for edge E to OB. */
4675 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4677 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4680 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4681 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4682 streamer_write_uhwi (ob
, es
->loop_depth
);
4684 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4685 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4686 streamer_write_bitpack (&bp
);
4689 es
->predicate
->stream_out (ob
);
4691 streamer_write_uhwi (ob
, 0);
4692 streamer_write_uhwi (ob
, es
->param
.length ());
4693 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4695 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4696 streamer_write_uhwi (ob
, es
->param
[i
].points_to_local_or_readonly_memory
);
4701 /* Write inline summary for node in SET.
4702 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4703 active, we don't need to write them twice. */
4706 ipa_fn_summary_write (void)
4708 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4709 lto_symtab_encoder_iterator lsei
;
4710 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4711 unsigned int count
= 0;
4713 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4714 lsei_next_function_in_partition (&lsei
))
4716 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4717 if (cnode
->definition
&& !cnode
->alias
)
4720 streamer_write_uhwi (ob
, count
);
4722 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4723 lsei_next_function_in_partition (&lsei
))
4725 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4726 if (cnode
->definition
&& !cnode
->alias
)
4728 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4729 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4730 struct bitpack_d bp
;
4731 struct cgraph_edge
*edge
;
4734 struct condition
*c
;
4736 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4737 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4738 streamer_write_hwi (ob
, size_info
->self_size
);
4739 info
->time
.stream_out (ob
);
4740 bp
= bitpack_create (ob
->main_stream
);
4741 bp_pack_value (&bp
, info
->inlinable
, 1);
4742 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4743 streamer_write_bitpack (&bp
);
4744 if (!lto_stream_offload_p
)
4745 streamer_write_uhwi (ob
, info
->target_info
);
4746 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4747 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4750 struct expr_eval_op
*op
;
4752 streamer_write_uhwi (ob
, c
->operand_num
);
4753 streamer_write_uhwi (ob
, c
->code
);
4754 stream_write_tree (ob
, c
->type
, true);
4755 stream_write_tree (ob
, c
->val
, true);
4756 bp
= bitpack_create (ob
->main_stream
);
4757 bp_pack_value (&bp
, c
->agg_contents
, 1);
4758 bp_pack_value (&bp
, c
->by_ref
, 1);
4759 streamer_write_bitpack (&bp
);
4760 if (c
->agg_contents
)
4761 streamer_write_uhwi (ob
, c
->offset
);
4762 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4763 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4765 streamer_write_uhwi (ob
, op
->code
);
4766 stream_write_tree (ob
, op
->type
, true);
4769 bp
= bitpack_create (ob
->main_stream
);
4770 bp_pack_value (&bp
, op
->index
, 2);
4771 streamer_write_bitpack (&bp
);
4772 stream_write_tree (ob
, op
->val
[0], true);
4774 stream_write_tree (ob
, op
->val
[1], true);
4778 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4779 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4781 streamer_write_uhwi (ob
, e
->size
);
4782 e
->time
.stream_out (ob
);
4783 e
->exec_predicate
.stream_out (ob
);
4784 e
->nonconst_predicate
.stream_out (ob
);
4786 ipa_freqcounting_predicate
*fcp
;
4787 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4788 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4790 fcp
->predicate
->stream_out (ob
);
4791 fcp
->freq
.stream_out (ob
);
4793 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4794 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4796 fcp
->predicate
->stream_out (ob
);
4797 fcp
->freq
.stream_out (ob
);
4799 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4801 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4803 streamer_write_uhwi (ob
, ip
);
4804 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4805 write_ipa_call_summary (ob
, edge
);
4806 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4807 write_ipa_call_summary (ob
, edge
);
4810 streamer_write_char_stream (ob
->main_stream
, 0);
4811 produce_asm (ob
, NULL
);
4812 destroy_output_block (ob
);
4814 ipa_prop_write_jump_functions ();
4818 /* Release function summary. */
4821 ipa_free_fn_summary (void)
4823 if (!ipa_call_summaries
)
4825 ggc_delete (ipa_fn_summaries
);
4826 ipa_fn_summaries
= NULL
;
4827 delete ipa_call_summaries
;
4828 ipa_call_summaries
= NULL
;
4829 edge_predicate_pool
.release ();
4830 /* During IPA this is one of largest datastructures to release. */
4835 /* Release function summary. */
4838 ipa_free_size_summary (void)
4840 if (!ipa_size_summaries
)
4842 delete ipa_size_summaries
;
4843 ipa_size_summaries
= NULL
;
4848 const pass_data pass_data_local_fn_summary
=
4850 GIMPLE_PASS
, /* type */
4851 "local-fnsummary", /* name */
4852 OPTGROUP_INLINE
, /* optinfo_flags */
4853 TV_INLINE_PARAMETERS
, /* tv_id */
4854 0, /* properties_required */
4855 0, /* properties_provided */
4856 0, /* properties_destroyed */
4857 0, /* todo_flags_start */
4858 0, /* todo_flags_finish */
4861 class pass_local_fn_summary
: public gimple_opt_pass
4864 pass_local_fn_summary (gcc::context
*ctxt
)
4865 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4868 /* opt_pass methods: */
4869 opt_pass
* clone () final override
4871 return new pass_local_fn_summary (m_ctxt
);
4873 unsigned int execute (function
*) final override
4875 return compute_fn_summary_for_current ();
4878 }; // class pass_local_fn_summary
4883 make_pass_local_fn_summary (gcc::context
*ctxt
)
4885 return new pass_local_fn_summary (ctxt
);
4889 /* Free inline summary. */
4893 const pass_data pass_data_ipa_free_fn_summary
=
4895 SIMPLE_IPA_PASS
, /* type */
4896 "free-fnsummary", /* name */
4897 OPTGROUP_NONE
, /* optinfo_flags */
4898 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4899 0, /* properties_required */
4900 0, /* properties_provided */
4901 0, /* properties_destroyed */
4902 0, /* todo_flags_start */
4903 0, /* todo_flags_finish */
4906 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4909 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4910 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4914 /* opt_pass methods: */
4915 opt_pass
*clone () final override
4917 return new pass_ipa_free_fn_summary (m_ctxt
);
4919 void set_pass_param (unsigned int n
, bool param
) final override
4921 gcc_assert (n
== 0);
4924 bool gate (function
*) final override
{ return true; }
4925 unsigned int execute (function
*) final override
4927 ipa_free_fn_summary ();
4928 /* Free ipa-prop structures if they are no longer needed. */
4929 ipa_free_all_structures_after_iinln ();
4931 ipa_free_size_summary ();
4937 }; // class pass_ipa_free_fn_summary
4941 simple_ipa_opt_pass
*
4942 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4944 return new pass_ipa_free_fn_summary (ctxt
);
4949 const pass_data pass_data_ipa_fn_summary
=
4951 IPA_PASS
, /* type */
4952 "fnsummary", /* name */
4953 OPTGROUP_INLINE
, /* optinfo_flags */
4954 TV_IPA_FNSUMMARY
, /* tv_id */
4955 0, /* properties_required */
4956 0, /* properties_provided */
4957 0, /* properties_destroyed */
4958 0, /* todo_flags_start */
4959 ( TODO_dump_symtab
), /* todo_flags_finish */
4962 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4965 pass_ipa_fn_summary (gcc::context
*ctxt
)
4966 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4967 ipa_fn_summary_generate
, /* generate_summary */
4968 ipa_fn_summary_write
, /* write_summary */
4969 ipa_fn_summary_read
, /* read_summary */
4970 NULL
, /* write_optimization_summary */
4971 NULL
, /* read_optimization_summary */
4972 NULL
, /* stmt_fixup */
4973 0, /* function_transform_todo_flags_start */
4974 NULL
, /* function_transform */
4975 NULL
) /* variable_transform */
4978 /* opt_pass methods: */
4979 unsigned int execute (function
*) final override
{ return 0; }
4981 }; // class pass_ipa_fn_summary
4986 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4988 return new pass_ipa_fn_summary (ctxt
);
4991 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
4992 within the same process. For use by toplev::finalize. */
4995 ipa_fnsummary_cc_finalize (void)
4997 ipa_free_fn_summary ();