1 /* Function summary pass.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
64 #include "tree-streamer.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
86 #include "tree-into-ssa.h"
87 #include "symtab-clones.h"
88 #include "gimple-range.h"
92 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
93 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
94 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
96 /* Edge predicates goes here. */
97 static object_allocator
<ipa_predicate
> edge_predicate_pool ("edge predicates");
100 /* Dump IPA hints. */
102 ipa_dump_hints (FILE *f
, ipa_hints hints
)
106 fprintf (f
, "IPA hints:");
107 if (hints
& INLINE_HINT_indirect_call
)
109 hints
&= ~INLINE_HINT_indirect_call
;
110 fprintf (f
, " indirect_call");
112 if (hints
& INLINE_HINT_loop_iterations
)
114 hints
&= ~INLINE_HINT_loop_iterations
;
115 fprintf (f
, " loop_iterations");
117 if (hints
& INLINE_HINT_loop_stride
)
119 hints
&= ~INLINE_HINT_loop_stride
;
120 fprintf (f
, " loop_stride");
122 if (hints
& INLINE_HINT_same_scc
)
124 hints
&= ~INLINE_HINT_same_scc
;
125 fprintf (f
, " same_scc");
127 if (hints
& INLINE_HINT_in_scc
)
129 hints
&= ~INLINE_HINT_in_scc
;
130 fprintf (f
, " in_scc");
132 if (hints
& INLINE_HINT_cross_module
)
134 hints
&= ~INLINE_HINT_cross_module
;
135 fprintf (f
, " cross_module");
137 if (hints
& INLINE_HINT_declared_inline
)
139 hints
&= ~INLINE_HINT_declared_inline
;
140 fprintf (f
, " declared_inline");
142 if (hints
& INLINE_HINT_known_hot
)
144 hints
&= ~INLINE_HINT_known_hot
;
145 fprintf (f
, " known_hot");
147 if (hints
& INLINE_HINT_builtin_constant_p
)
149 hints
&= ~INLINE_HINT_builtin_constant_p
;
150 fprintf (f
, " builtin_constant_p");
156 /* Record SIZE and TIME to SUMMARY.
157 The accounted code will be executed when EXEC_PRED is true.
158 When NONCONST_PRED is false the code will evaluate to constant and
159 will get optimized out in specialized clones of the function.
160 If CALL is true account to call_size_time_table rather than
164 ipa_fn_summary::account_size_time (int size
, sreal time
,
165 const ipa_predicate
&exec_pred
,
166 const ipa_predicate
&nonconst_pred_in
,
172 ipa_predicate nonconst_pred
;
173 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
175 if (exec_pred
== false)
178 nonconst_pred
= nonconst_pred_in
& exec_pred
;
180 if (nonconst_pred
== false)
183 /* We need to create initial empty unconditional clause, but otherwise
184 we don't need to account empty times and sizes. */
185 if (!size
&& time
== 0 && table
->length ())
188 /* Only for calls we are unaccounting what we previously recorded. */
189 gcc_checking_assert (time
>= 0 || call
);
191 for (i
= 0; table
->iterate (i
, &e
); i
++)
192 if (e
->exec_predicate
== exec_pred
193 && e
->nonconst_predicate
== nonconst_pred
)
198 if (i
== max_size_time_table_size
)
203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
205 "\t\tReached limit on number of entries, "
206 "ignoring the predicate.");
208 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
211 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
212 ((double) size
) / ipa_fn_summary::size_scale
,
213 (time
.to_double ()), found
? "" : "new ");
214 exec_pred
.dump (dump_file
, conds
, 0);
215 if (exec_pred
!= nonconst_pred
)
217 fprintf (dump_file
, " nonconst:");
218 nonconst_pred
.dump (dump_file
, conds
);
221 fprintf (dump_file
, "\n");
225 class size_time_entry new_entry
;
226 new_entry
.size
= size
;
227 new_entry
.time
= time
;
228 new_entry
.exec_predicate
= exec_pred
;
229 new_entry
.nonconst_predicate
= nonconst_pred
;
231 call_size_time_table
.safe_push (new_entry
);
233 size_time_table
.safe_push (new_entry
);
239 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
240 /* Tolerate small roundoff issues. */
246 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
248 static struct cgraph_edge
*
249 redirect_to_unreachable (struct cgraph_edge
*e
)
251 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
252 struct cgraph_node
*target
= cgraph_node::get_create
253 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
256 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
258 e
= cgraph_edge::make_direct (e
, target
);
260 e
->redirect_callee (target
);
261 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
262 e
->inline_failed
= CIF_UNREACHABLE
;
263 e
->count
= profile_count::zero ();
264 es
->call_stmt_size
= 0;
265 es
->call_stmt_time
= 0;
267 callee
->remove_symbol_and_inline_clones ();
271 /* Set predicate for edge E. */
274 edge_set_predicate (struct cgraph_edge
*e
, ipa_predicate
*predicate
)
276 /* If the edge is determined to be never executed, redirect it
277 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
279 if (predicate
&& *predicate
== false
280 /* When handling speculative edges, we need to do the redirection
281 just once. Do it always on the direct edge, so we do not
282 attempt to resolve speculation while duplicating the edge. */
283 && (!e
->speculative
|| e
->callee
))
284 e
= redirect_to_unreachable (e
);
286 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
287 if (predicate
&& *predicate
!= true)
290 es
->predicate
= edge_predicate_pool
.allocate ();
291 *es
->predicate
= *predicate
;
296 edge_predicate_pool
.remove (es
->predicate
);
297 es
->predicate
= NULL
;
301 /* Set predicate for hint *P. */
304 set_hint_predicate (ipa_predicate
**p
, ipa_predicate new_predicate
)
306 if (new_predicate
== false || new_predicate
== true)
309 edge_predicate_pool
.remove (*p
);
315 *p
= edge_predicate_pool
.allocate ();
320 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
321 Otherwise add a new item to the vector with this predicate and frerq equal
322 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
323 in which case the function does nothing. */
326 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
327 const ipa_predicate
&new_predicate
, sreal add_freq
,
328 unsigned max_num_predicates
)
330 if (new_predicate
== false || new_predicate
== true)
332 ipa_freqcounting_predicate
*f
;
333 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
334 if (new_predicate
== f
->predicate
)
339 if (vec_safe_length (*v
) >= max_num_predicates
)
340 /* Too many different predicates to account for. */
343 ipa_freqcounting_predicate fcp
;
344 fcp
.predicate
= NULL
;
345 set_hint_predicate (&fcp
.predicate
, new_predicate
);
347 vec_safe_push (*v
, fcp
);
351 /* Compute what conditions may or may not hold given information about
352 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
353 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
354 copy when called in a given context. It is a bitmask of conditions. Bit
355 0 means that condition is known to be false, while bit 1 means that condition
356 may or may not be true. These differs - for example NOT_INLINED condition
357 is always false in the second and also builtin_constant_p tests cannot use
358 the fact that parameter is indeed a constant.
360 When INLINE_P is true, assume that we are inlining. AVAL contains known
361 information about argument values. The function does not modify its content
362 and so AVALs could also be of type ipa_call_arg_values but so far all
363 callers work with the auto version and so we avoid the conversion for
366 ERROR_MARK value of an argument means compile time invariant. */
369 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
371 ipa_auto_call_arg_values
*avals
,
372 clause_t
*ret_clause
,
373 clause_t
*ret_nonspec_clause
)
375 clause_t clause
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
376 clause_t nonspec_clause
= 1 << ipa_predicate::not_inlined_condition
;
377 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
381 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
386 struct expr_eval_op
*op
;
388 /* We allow call stmt to have fewer arguments than the callee function
389 (especially for K&R style programs). So bound check here (we assume
390 m_known_aggs vector is either empty or has the same length as
392 gcc_checking_assert (!avals
->m_known_aggs
.length ()
393 || !avals
->m_known_vals
.length ()
394 || (avals
->m_known_vals
.length ()
395 == avals
->m_known_aggs
.length ()));
399 if (c
->code
== ipa_predicate::changed
401 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
404 if (ipa_agg_value_set
*agg
= avals
->safe_aggval_at (c
->operand_num
))
406 tree sval
= avals
->safe_sval_at (c
->operand_num
);
407 val
= ipa_find_agg_cst_for_param (agg
, sval
, c
->offset
,
415 val
= avals
->safe_sval_at (c
->operand_num
);
416 if (val
&& val
== error_mark_node
417 && c
->code
!= ipa_predicate::changed
)
422 && (c
->code
== ipa_predicate::changed
423 || c
->code
== ipa_predicate::is_not_constant
))
425 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
426 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
429 if (c
->code
== ipa_predicate::changed
)
431 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
435 if (c
->code
== ipa_predicate::is_not_constant
)
437 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
441 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
443 if (c
->type
!= TREE_TYPE (val
))
444 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
445 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
450 val
= fold_unary (op
->code
, op
->type
, val
);
451 else if (!op
->val
[1])
452 val
= fold_binary (op
->code
, op
->type
,
453 op
->index
? op
->val
[0] : val
,
454 op
->index
? val
: op
->val
[0]);
455 else if (op
->index
== 0)
456 val
= fold_ternary (op
->code
, op
->type
,
457 val
, op
->val
[0], op
->val
[1]);
458 else if (op
->index
== 1)
459 val
= fold_ternary (op
->code
, op
->type
,
460 op
->val
[0], val
, op
->val
[1]);
461 else if (op
->index
== 2)
462 val
= fold_ternary (op
->code
, op
->type
,
463 op
->val
[0], op
->val
[1], val
);
469 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
472 if (res
&& integer_zerop (res
))
474 if (res
&& integer_onep (res
))
476 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
478 |= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
482 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
484 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
486 value_range vr
= avals
->m_known_value_ranges
[c
->operand_num
];
487 if (!vr
.undefined_p ()
489 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
491 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
494 range_fold_unary_expr (&res
, NOP_EXPR
,
495 c
->type
, &vr
, vr
.type ());
500 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
502 if (vr
.varying_p () || vr
.undefined_p ())
507 range_fold_unary_expr (&res
, op
->code
, op
->type
, &vr
, type
);
508 else if (!op
->val
[1])
510 value_range
op0 (op
->val
[0], op
->val
[0]);
511 range_fold_binary_expr (&res
, op
->code
, op
->type
,
512 op
->index
? &op0
: &vr
,
513 op
->index
? &vr
: &op0
);
520 if (!vr
.varying_p () && !vr
.undefined_p ())
523 value_range
val_vr (c
->val
, c
->val
);
524 range_fold_binary_expr (&res
, c
->code
, boolean_type_node
,
533 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
534 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
536 *ret_clause
= clause
;
537 if (ret_nonspec_clause
)
538 *ret_nonspec_clause
= nonspec_clause
;
541 /* Return true if VRP will be exectued on the function.
542 We do not want to anticipate optimizations that will not happen.
544 FIXME: This can be confused with -fdisable and debug counters and thus
545 it should not be used for correctness (only to make heuristics work).
546 This means that inliner should do its own optimizations of expressions
547 that it predicts to be constant so wrong code can not be triggered by
548 builtin_constant_p. */
551 vrp_will_run_p (struct cgraph_node
*node
)
553 return (opt_for_fn (node
->decl
, optimize
)
554 && !opt_for_fn (node
->decl
, optimize_debug
)
555 && opt_for_fn (node
->decl
, flag_tree_vrp
));
558 /* Similarly about FRE. */
561 fre_will_run_p (struct cgraph_node
*node
)
563 return (opt_for_fn (node
->decl
, optimize
)
564 && !opt_for_fn (node
->decl
, optimize_debug
)
565 && opt_for_fn (node
->decl
, flag_tree_fre
));
568 /* Work out what conditions might be true at invocation of E.
569 Compute costs for inlined edge if INLINE_P is true.
571 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
572 (if non-NULL) conditions evaluated for nonspecialized clone called
575 Vectors in AVALS will be populated with useful known information about
576 argument values - information not known to have any uses will be omitted -
577 except for m_known_contexts which will only be calculated if
578 COMPUTE_CONTEXTS is true. */
581 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
582 clause_t
*clause_ptr
,
583 clause_t
*nonspec_clause_ptr
,
584 ipa_auto_call_arg_values
*avals
,
585 bool compute_contexts
)
587 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
588 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
589 class ipa_edge_args
*args
;
592 *clause_ptr
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
594 if (ipa_node_params_sum
595 && !e
->call_stmt_cannot_inline_p
596 && (info
->conds
|| compute_contexts
)
597 && (args
= ipa_edge_args_sum
->get (e
)) != NULL
)
599 struct cgraph_node
*caller
;
600 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
601 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
602 int i
, count
= ipa_get_cs_argument_count (args
);
606 if (e
->caller
->inlined_to
)
607 caller
= e
->caller
->inlined_to
;
610 caller_parms_info
= ipa_node_params_sum
->get (caller
);
611 callee_pi
= ipa_node_params_sum
->get (callee
);
613 /* Watch for thunks. */
615 /* Watch for variadic functions. */
616 count
= MIN (count
, ipa_get_param_count (callee_pi
));
620 for (i
= 0; i
< count
; i
++)
622 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
624 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
625 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
627 /* Determine if we know constant value of the parameter. */
628 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
629 ipa_get_type (callee_pi
, i
));
631 if (!cst
&& e
->call_stmt
632 && i
< (int)gimple_call_num_args (e
->call_stmt
))
634 cst
= gimple_call_arg (e
->call_stmt
, i
);
635 if (!is_gimple_min_invariant (cst
))
640 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
641 if (!avals
->m_known_vals
.length ())
642 avals
->m_known_vals
.safe_grow_cleared (count
, true);
643 avals
->m_known_vals
[i
] = cst
;
645 else if (inline_p
&& !es
->param
[i
].change_prob
)
647 if (!avals
->m_known_vals
.length ())
648 avals
->m_known_vals
.safe_grow_cleared (count
, true);
649 avals
->m_known_vals
[i
] = error_mark_node
;
652 /* If we failed to get simple constant, try value range. */
653 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
654 && vrp_will_run_p (caller
)
655 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
658 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
659 ipa_get_type (callee_pi
,
661 if (!vr
.undefined_p () && !vr
.varying_p ())
663 if (!avals
->m_known_value_ranges
.length ())
665 avals
->m_known_value_ranges
.safe_grow (count
, true);
666 for (int i
= 0; i
< count
; ++i
)
667 new (&avals
->m_known_value_ranges
[i
])
670 avals
->m_known_value_ranges
[i
] = vr
;
674 /* Determine known aggregate values. */
675 if (fre_will_run_p (caller
))
677 ipa_agg_value_set agg
678 = ipa_agg_value_set_from_jfunc (caller_parms_info
,
680 if (agg
.items
.length ())
682 if (!avals
->m_known_aggs
.length ())
683 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
684 avals
->m_known_aggs
[i
] = agg
;
689 /* For calls used in polymorphic calls we further determine
690 polymorphic call context. */
692 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
694 ipa_polymorphic_call_context
695 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
696 if (!ctx
.useless_p ())
698 if (!avals
->m_known_contexts
.length ())
699 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
700 avals
->m_known_contexts
[i
]
701 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
706 gcc_assert (!count
|| callee
->thunk
);
708 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
710 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
712 for (i
= 0; i
< count
; i
++)
714 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
715 if (!is_gimple_min_invariant (cst
))
719 if (!avals
->m_known_vals
.length ())
720 avals
->m_known_vals
.safe_grow_cleared (count
, true);
721 avals
->m_known_vals
[i
] = cst
;
726 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
731 /* Allocate the function summary. */
734 ipa_fn_summary_alloc (void)
736 gcc_checking_assert (!ipa_fn_summaries
);
737 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
738 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
739 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
742 ipa_call_summary::~ipa_call_summary ()
745 edge_predicate_pool
.remove (predicate
);
750 ipa_fn_summary::~ipa_fn_summary ()
752 unsigned len
= vec_safe_length (loop_iterations
);
753 for (unsigned i
= 0; i
< len
; i
++)
754 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
755 len
= vec_safe_length (loop_strides
);
756 for (unsigned i
= 0; i
< len
; i
++)
757 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
759 call_size_time_table
.release ();
760 vec_free (loop_iterations
);
761 vec_free (loop_strides
);
762 builtin_constant_p_parms
.release ();
766 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
769 for (e
= node
->callees
; e
; e
= e
->next_callee
)
770 ipa_call_summaries
->remove (e
);
771 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
772 ipa_call_summaries
->remove (e
);
775 /* Duplicate predicates in loop hint vector, allocating memory for them and
776 remove and deallocate any uninteresting (true or false) ones. Return the
779 static vec
<ipa_freqcounting_predicate
, va_gc
> *
780 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
781 clause_t possible_truths
)
783 if (vec_safe_length (v
) == 0)
786 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
787 int len
= res
->length();
788 for (int i
= len
- 1; i
>= 0; i
--)
790 ipa_predicate new_predicate
791 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
792 /* We do not want to free previous predicate; it is used by node
794 (*res
)[i
].predicate
= NULL
;
795 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
797 if (!(*res
)[i
].predicate
)
798 res
->unordered_remove (i
);
805 /* Hook that is called by cgraph.c when a node is duplicated. */
807 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
809 ipa_fn_summary
*src_info
,
810 ipa_fn_summary
*info
)
812 new (info
) ipa_fn_summary (*src_info
);
813 /* TODO: as an optimization, we may avoid copying conditions
814 that are known to be false or true. */
815 info
->conds
= vec_safe_copy (info
->conds
);
817 clone_info
*cinfo
= clone_info::get (dst
);
818 /* When there are any replacements in the function body, see if we can figure
819 out that something was optimized out. */
820 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
822 /* Use SRC parm info since it may not be copied yet. */
823 ipa_node_params
*parms_info
= ipa_node_params_sum
->get (src
);
824 ipa_auto_call_arg_values avals
;
825 int count
= ipa_get_param_count (parms_info
);
827 clause_t possible_truths
;
828 ipa_predicate true_pred
= true;
830 int optimized_out_size
= 0;
831 bool inlined_to_p
= false;
832 struct cgraph_edge
*edge
, *next
;
834 info
->size_time_table
.release ();
835 avals
.m_known_vals
.safe_grow_cleared (count
, true);
836 for (i
= 0; i
< count
; i
++)
838 struct ipa_replace_map
*r
;
840 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
842 if (r
->parm_num
== i
)
844 avals
.m_known_vals
[i
] = r
->new_tree
;
849 evaluate_conditions_for_known_args (dst
, false,
852 /* We are going to specialize,
853 so ignore nonspec truths. */
856 info
->account_size_time (0, 0, true_pred
, true_pred
);
858 /* Remap size_time vectors.
859 Simplify the predicate by pruning out alternatives that are known
861 TODO: as on optimization, we can also eliminate conditions known
863 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
865 ipa_predicate new_exec_pred
;
866 ipa_predicate new_nonconst_pred
;
867 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
869 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
871 if (new_exec_pred
== false || new_nonconst_pred
== false)
872 optimized_out_size
+= e
->size
;
874 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
878 /* Remap edge predicates with the same simplification as above.
879 Also copy constantness arrays. */
880 for (edge
= dst
->callees
; edge
; edge
= next
)
882 ipa_predicate new_predicate
;
883 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
884 next
= edge
->next_callee
;
886 if (!edge
->inline_failed
)
890 new_predicate
= es
->predicate
->remap_after_duplication
892 if (new_predicate
== false && *es
->predicate
!= false)
893 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
894 edge_set_predicate (edge
, &new_predicate
);
897 /* Remap indirect edge predicates with the same simplification as above.
898 Also copy constantness arrays. */
899 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
901 ipa_predicate new_predicate
;
902 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
903 next
= edge
->next_callee
;
905 gcc_checking_assert (edge
->inline_failed
);
908 new_predicate
= es
->predicate
->remap_after_duplication
910 if (new_predicate
== false && *es
->predicate
!= false)
912 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
913 edge_set_predicate (edge
, &new_predicate
);
915 info
->loop_iterations
916 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
919 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
921 if (info
->builtin_constant_p_parms
.length())
923 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
925 info
->builtin_constant_p_parms
= vNULL
;
926 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
927 if (!avals
.m_known_vals
[ip
])
928 info
->builtin_constant_p_parms
.safe_push (ip
);
931 /* If inliner or someone after inliner will ever start producing
932 non-trivial clones, we will get trouble with lack of information
933 about updating self sizes, because size vectors already contains
934 sizes of the callees. */
935 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
939 info
->size_time_table
= src_info
->size_time_table
.copy ();
940 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
941 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
943 info
->builtin_constant_p_parms
944 = info
->builtin_constant_p_parms
.copy ();
946 ipa_freqcounting_predicate
*f
;
947 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
949 ipa_predicate p
= *f
->predicate
;
951 set_hint_predicate (&f
->predicate
, p
);
953 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
955 ipa_predicate p
= *f
->predicate
;
957 set_hint_predicate (&f
->predicate
, p
);
960 if (!dst
->inlined_to
)
961 ipa_update_overall_fn_summary (dst
);
965 /* Hook that is called by cgraph.c when a node is duplicated. */
968 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
969 struct cgraph_edge
*dst
,
970 class ipa_call_summary
*srcinfo
,
971 class ipa_call_summary
*info
)
973 new (info
) ipa_call_summary (*srcinfo
);
974 info
->predicate
= NULL
;
975 edge_set_predicate (dst
, srcinfo
->predicate
);
976 info
->param
= srcinfo
->param
.copy ();
977 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
979 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
980 - eni_size_weights
.call_cost
);
981 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
982 - eni_time_weights
.call_cost
);
986 /* Dump edge summaries associated to NODE and recursively to all clones.
990 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
991 class ipa_fn_summary
*info
)
993 struct cgraph_edge
*edge
;
994 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
996 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
997 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1001 "%*s%s %s\n%*s freq:%4.2f",
1002 indent
, "", callee
->dump_name (),
1003 !edge
->inline_failed
1004 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1005 indent
, "", edge
->sreal_frequency ().to_double ());
1007 if (cross_module_call_p (edge
))
1008 fprintf (f
, " cross module");
1011 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1012 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1014 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1015 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1017 fprintf (f
, " callee size:%2i stack:%2i",
1018 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1019 (int) s
->estimated_stack_size
);
1021 if (es
&& es
->predicate
)
1023 fprintf (f
, " predicate: ");
1024 es
->predicate
->dump (f
, info
->conds
);
1028 if (es
&& es
->param
.exists ())
1029 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1031 int prob
= es
->param
[i
].change_prob
;
1034 fprintf (f
, "%*s op%i is compile time invariant\n",
1036 else if (prob
!= REG_BR_PROB_BASE
)
1037 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1038 prob
* 100.0 / REG_BR_PROB_BASE
);
1039 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1040 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1043 if (!edge
->inline_failed
)
1045 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1046 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1048 (int) ipa_get_stack_frame_offset (callee
),
1049 (int) ss
->estimated_self_stack_size
);
1050 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1053 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1055 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1056 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1060 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1061 es
->call_stmt_time
);
1064 fprintf (f
, "predicate: ");
1065 es
->predicate
->dump (f
, info
->conds
);
1074 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1076 if (node
->definition
)
1078 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1079 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1084 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1085 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1086 fprintf (f
, " always_inline");
1088 fprintf (f
, " inlinable");
1089 if (s
->fp_expressions
)
1090 fprintf (f
, " fp_expression");
1091 if (s
->builtin_constant_p_parms
.length ())
1093 fprintf (f
, " builtin_constant_p_parms");
1094 for (unsigned int i
= 0;
1095 i
< s
->builtin_constant_p_parms
.length (); i
++)
1096 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1098 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1099 fprintf (f
, " self size: %i\n", ss
->self_size
);
1100 fprintf (f
, " global size: %i\n", ss
->size
);
1101 fprintf (f
, " min size: %i\n", s
->min_size
);
1102 fprintf (f
, " self stack: %i\n",
1103 (int) ss
->estimated_self_stack_size
);
1104 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1106 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1108 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1109 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1111 fprintf (f
, " size:%f, time:%f",
1112 (double) e
->size
/ ipa_fn_summary::size_scale
,
1113 e
->time
.to_double ());
1114 if (e
->exec_predicate
!= true)
1116 fprintf (f
, ", executed if:");
1117 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1119 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1121 fprintf (f
, ", nonconst if:");
1122 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1126 ipa_freqcounting_predicate
*fcp
;
1127 bool first_fcp
= true;
1128 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1132 fprintf (f
, " loop iterations:");
1135 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1136 fcp
->predicate
->dump (f
, s
->conds
);
1139 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1143 fprintf (f
, " loop strides:");
1146 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1147 fcp
->predicate
->dump (f
, s
->conds
);
1149 fprintf (f
, " calls:\n");
1150 dump_ipa_call_summary (f
, 4, node
, s
);
1154 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1159 ipa_debug_fn_summary (struct cgraph_node
*node
)
1161 ipa_dump_fn_summary (stderr
, node
);
1165 ipa_dump_fn_summaries (FILE *f
)
1167 struct cgraph_node
*node
;
1169 FOR_EACH_DEFINED_FUNCTION (node
)
1170 if (!node
->inlined_to
)
1171 ipa_dump_fn_summary (f
, node
);
1174 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1175 boolean variable pointed to by DATA. */
1178 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1181 bool *b
= (bool *) data
;
1186 /* If OP refers to value of function parameter, return the corresponding
1187 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1188 PARM_DECL) will be stored to *SIZE_P in that case too. */
1191 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1194 /* SSA_NAME referring to parm default def? */
1195 if (TREE_CODE (op
) == SSA_NAME
1196 && SSA_NAME_IS_DEFAULT_DEF (op
)
1197 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1200 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1201 return SSA_NAME_VAR (op
);
1203 /* Non-SSA parm reference? */
1204 if (TREE_CODE (op
) == PARM_DECL
1205 && fbi
->aa_walk_budget
> 0)
1207 bool modified
= false;
1210 ao_ref_init (&refd
, op
);
1211 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1212 mark_modified
, &modified
, NULL
, NULL
,
1213 fbi
->aa_walk_budget
);
1216 fbi
->aa_walk_budget
= 0;
1219 fbi
->aa_walk_budget
-= walked
;
1223 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1230 /* If OP refers to value of function parameter, return the corresponding
1231 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1232 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1233 stored to *SIZE_P in that case too. */
1236 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1239 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1243 if (TREE_CODE (op
) == SSA_NAME
1244 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1245 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1246 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1247 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1252 /* If OP refers to a value of a function parameter or value loaded from an
1253 aggregate passed to a parameter (either by value or reference), return TRUE
1254 and store the number of the parameter to *INDEX_P, the access size into
1255 *SIZE_P, and information whether and how it has been loaded from an
1256 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1257 statement in which OP is used or loaded. */
1260 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1261 gimple
*stmt
, tree op
, int *index_p
,
1263 struct agg_position_info
*aggpos
)
1265 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1267 gcc_checking_assert (aggpos
);
1270 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1273 aggpos
->agg_contents
= false;
1274 aggpos
->by_ref
= false;
1278 if (TREE_CODE (op
) == SSA_NAME
)
1280 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1281 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1283 stmt
= SSA_NAME_DEF_STMT (op
);
1284 op
= gimple_assign_rhs1 (stmt
);
1285 if (!REFERENCE_CLASS_P (op
))
1286 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1290 aggpos
->agg_contents
= true;
1291 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1292 stmt
, op
, index_p
, &aggpos
->offset
,
1293 size_p
, &aggpos
->by_ref
);
1296 /* See if statement might disappear after inlining.
1297 0 - means not eliminated
1298 1 - half of statements goes away
1299 2 - for sure it is eliminated.
1300 We are not terribly sophisticated, basically looking for simple abstraction
1301 penalty wrappers. */
1304 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1306 enum gimple_code code
= gimple_code (stmt
);
1307 enum tree_code rhs_code
;
1317 if (gimple_num_ops (stmt
) != 2)
1320 rhs_code
= gimple_assign_rhs_code (stmt
);
1322 /* Casts of parameters, loads from parameters passed by reference
1323 and stores to return value or parameters are often free after
1324 inlining due to SRA and further combining.
1325 Assume that half of statements goes away. */
1326 if (CONVERT_EXPR_CODE_P (rhs_code
)
1327 || rhs_code
== VIEW_CONVERT_EXPR
1328 || rhs_code
== ADDR_EXPR
1329 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1331 tree rhs
= gimple_assign_rhs1 (stmt
);
1332 tree lhs
= gimple_assign_lhs (stmt
);
1333 tree inner_rhs
= get_base_address (rhs
);
1334 tree inner_lhs
= get_base_address (lhs
);
1335 bool rhs_free
= false;
1336 bool lhs_free
= false;
1343 /* Reads of parameter are expected to be free. */
1344 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1346 /* Match expressions of form &this->field. Those will most likely
1347 combine with something upstream after inlining. */
1348 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1350 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1351 if (TREE_CODE (op
) == PARM_DECL
)
1353 else if (TREE_CODE (op
) == MEM_REF
1354 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1359 /* When parameter is not SSA register because its address is taken
1360 and it is just copied into one, the statement will be completely
1361 free after inlining (we will copy propagate backward). */
1362 if (rhs_free
&& is_gimple_reg (lhs
))
1365 /* Reads of parameters passed by reference
1366 expected to be free (i.e. optimized out after inlining). */
1367 if (TREE_CODE (inner_rhs
) == MEM_REF
1368 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1371 /* Copying parameter passed by reference into gimple register is
1372 probably also going to copy propagate, but we can't be quite
1374 if (rhs_free
&& is_gimple_reg (lhs
))
1377 /* Writes to parameters, parameters passed by value and return value
1378 (either directly or passed via invisible reference) are free.
1380 TODO: We ought to handle testcase like
1381 struct a {int a,b;};
1389 This translate into:
1404 For that we either need to copy ipa-split logic detecting writes
1406 if (TREE_CODE (inner_lhs
) == PARM_DECL
1407 || TREE_CODE (inner_lhs
) == RESULT_DECL
1408 || (TREE_CODE (inner_lhs
) == MEM_REF
1409 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1411 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1412 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1413 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1415 0))) == RESULT_DECL
))))
1418 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1420 if (lhs_free
&& rhs_free
)
1429 /* Analyze EXPR if it represents a series of simple operations performed on
1430 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1431 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1432 Type of the parameter or load from an aggregate via the parameter is
1433 stored in *TYPE_P. Operations on the parameter are recorded to
1434 PARAM_OPS_P if it is not NULL. */
1437 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1438 gimple
*stmt
, tree expr
,
1439 int *index_p
, tree
*type_p
,
1440 struct agg_position_info
*aggpos
,
1441 expr_eval_ops
*param_ops_p
= NULL
)
1443 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1447 *param_ops_p
= NULL
;
1451 expr_eval_op eval_op
;
1453 unsigned cst_count
= 0;
1455 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1458 tree type
= TREE_TYPE (expr
);
1460 if (aggpos
->agg_contents
)
1462 /* Stop if containing bit-field. */
1463 if (TREE_CODE (expr
) == BIT_FIELD_REF
1464 || contains_bitfld_component_ref_p (expr
))
1472 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1475 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1478 switch (gimple_assign_rhs_class (stmt
))
1480 case GIMPLE_SINGLE_RHS
:
1481 expr
= gimple_assign_rhs1 (stmt
);
1484 case GIMPLE_UNARY_RHS
:
1488 case GIMPLE_BINARY_RHS
:
1492 case GIMPLE_TERNARY_RHS
:
1500 /* Stop if expression is too complex. */
1501 if (op_count
++ == op_limit
)
1506 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1507 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1508 eval_op
.val
[0] = NULL_TREE
;
1509 eval_op
.val
[1] = NULL_TREE
;
1513 for (unsigned i
= 0; i
< rhs_count
; i
++)
1515 tree op
= gimple_op (stmt
, i
+ 1);
1517 gcc_assert (op
&& !TYPE_P (op
));
1518 if (is_gimple_ip_invariant (op
))
1520 if (++cst_count
== rhs_count
)
1523 eval_op
.val
[cst_count
- 1] = op
;
1527 /* Found a non-constant operand, and record its index in rhs
1534 /* Found more than one non-constant operands. */
1540 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1543 /* Failed to decompose, free resource and return. */
1546 vec_free (*param_ops_p
);
1551 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1554 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1558 /* Avoid duplicates. */
1559 for (unsigned int i
= 0;
1560 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1563 summary
->builtin_constant_p_parms
.safe_push (parm
);
1566 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1567 predicates to the CFG edges. */
1570 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1571 class ipa_fn_summary
*summary
,
1572 class ipa_node_params
*params_summary
,
1578 struct agg_position_info aggpos
;
1579 enum tree_code code
, inverted_code
;
1584 expr_eval_ops param_ops
;
1586 last
= last_stmt (bb
);
1587 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1589 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1591 op
= gimple_cond_lhs (last
);
1593 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1596 code
= gimple_cond_code (last
);
1597 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1599 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1601 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1602 ? code
: inverted_code
);
1603 /* invert_tree_comparison will return ERROR_MARK on FP
1604 comparisons that are not EQ/NE instead of returning proper
1605 unordered one. Be sure it is not confused with NON_CONSTANT.
1607 And if the edge's target is the final block of diamond CFG graph
1608 of this conditional statement, we do not need to compute
1609 predicate for the edge because the final block's predicate must
1610 be at least as that of the first block of the statement. */
1611 if (this_code
!= ERROR_MARK
1612 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1615 = add_condition (summary
, params_summary
, index
,
1616 param_type
, &aggpos
,
1617 this_code
, gimple_cond_rhs (last
), param_ops
);
1618 e
->aux
= edge_predicate_pool
.allocate ();
1619 *(ipa_predicate
*) e
->aux
= p
;
1622 vec_free (param_ops
);
1625 if (TREE_CODE (op
) != SSA_NAME
)
1628 if (builtin_constant_p (op))
1632 Here we can predicate nonconstant_code. We can't
1633 really handle constant_code since we have no predicate
1634 for this and also the constant code is not known to be
1635 optimized away when inliner doesn't see operand is constant.
1636 Other optimizers might think otherwise. */
1637 if (gimple_cond_code (last
) != NE_EXPR
1638 || !integer_zerop (gimple_cond_rhs (last
)))
1640 set_stmt
= SSA_NAME_DEF_STMT (op
);
1641 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1642 || gimple_call_num_args (set_stmt
) != 1)
1644 op2
= gimple_call_arg (set_stmt
, 0);
1645 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1648 add_builtin_constant_p_parm (summary
, index
);
1649 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1651 ipa_predicate p
= add_condition (summary
, params_summary
, index
,
1652 param_type
, &aggpos
,
1653 ipa_predicate::is_not_constant
, NULL_TREE
);
1654 e
->aux
= edge_predicate_pool
.allocate ();
1655 *(ipa_predicate
*) e
->aux
= p
;
1660 /* If BB ends by a switch we can turn into predicates, attach corresponding
1661 predicates to the CFG edges. */
1664 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1665 class ipa_fn_summary
*summary
,
1666 class ipa_node_params
*params_summary
,
1672 struct agg_position_info aggpos
;
1678 expr_eval_ops param_ops
;
1680 lastg
= last_stmt (bb
);
1681 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1683 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1684 op
= gimple_switch_index (last
);
1685 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1689 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1690 tree type
= TREE_TYPE (op
);
1691 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1692 param_ipa_max_switch_predicate_bounds
);
1693 int bound_count
= 0;
1696 get_range_query (cfun
)->range_of_expr (vr
, op
);
1697 if (vr
.undefined_p ())
1698 vr
.set_varying (TREE_TYPE (op
));
1699 value_range_kind vr_type
= vr
.kind ();
1700 wide_int vr_wmin
= wi::to_wide (vr
.min ());
1701 wide_int vr_wmax
= wi::to_wide (vr
.max ());
1703 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1705 e
->aux
= edge_predicate_pool
.allocate ();
1706 *(ipa_predicate
*) e
->aux
= false;
1709 e
= gimple_switch_edge (cfun
, last
, 0);
1710 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1711 default case if its target basic block is in convergence point of all
1712 switch cases, which can be determined by checking whether it
1713 post-dominates the switch statement. */
1714 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1715 bound_count
= INT_MAX
;
1717 n
= gimple_switch_num_labels (last
);
1718 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1720 tree cl
= gimple_switch_label (last
, case_idx
);
1721 tree min
= CASE_LOW (cl
);
1722 tree max
= CASE_HIGH (cl
);
1725 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1727 /* The case value might not have same type as switch expression,
1728 extend the value based on the expression type. */
1729 if (TREE_TYPE (min
) != type
)
1730 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1734 else if (TREE_TYPE (max
) != type
)
1735 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1737 /* The case's target basic block is in convergence point of all switch
1738 cases, its predicate should be at least as that of the switch
1740 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1742 else if (min
== max
)
1743 p
= add_condition (summary
, params_summary
, index
, param_type
,
1744 &aggpos
, EQ_EXPR
, min
, param_ops
);
1747 ipa_predicate p1
, p2
;
1748 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1749 &aggpos
, GE_EXPR
, min
, param_ops
);
1750 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1751 &aggpos
, LE_EXPR
, max
, param_ops
);
1754 *(ipa_predicate
*) e
->aux
1755 = p
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1757 /* If there are too many disjoint case ranges, predicate for default
1758 case might become too complicated. So add a limit here. */
1759 if (bound_count
> bound_limit
)
1762 bool new_range
= true;
1764 if (!ranges
.is_empty ())
1766 wide_int curr_wmin
= wi::to_wide (min
);
1767 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1769 /* Merge case ranges if they are continuous. */
1770 if (curr_wmin
== last_wmax
+ 1)
1772 else if (vr_type
== VR_ANTI_RANGE
)
1774 /* If two disjoint case ranges can be connected by anti-range
1775 of switch index, combine them to one range. */
1776 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1777 vr_type
= VR_UNDEFINED
;
1778 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1783 /* Create/extend a case range. And we count endpoints of range set,
1784 this number nearly equals to number of conditions that we will create
1785 for predicate of default case. */
1788 bound_count
+= (min
== max
) ? 1 : 2;
1789 ranges
.safe_push (std::make_pair (min
, max
));
1793 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1794 ranges
.last ().second
= max
;
1798 e
= gimple_switch_edge (cfun
, last
, 0);
1799 if (bound_count
> bound_limit
)
1801 *(ipa_predicate
*) e
->aux
= true;
1802 vec_free (param_ops
);
1806 ipa_predicate p_seg
= true;
1807 ipa_predicate p_all
= false;
1809 if (vr_type
!= VR_RANGE
)
1811 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1812 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1815 /* Construct predicate to represent default range set that is negation of
1816 all case ranges. Case range is classified as containing single/non-single
1817 values. Suppose a piece of case ranges in the following.
1819 [D1...D2] [S1] ... [Sn] [D3...D4]
1821 To represent default case's range sets between two non-single value
1822 case ranges (From D2 to D3), we construct predicate as:
1824 D2 < x < D3 && x != S1 && ... && x != Sn
1826 for (size_t i
= 0; i
< ranges
.length (); i
++)
1828 tree min
= ranges
[i
].first
;
1829 tree max
= ranges
[i
].second
;
1832 p_seg
&= add_condition (summary
, params_summary
, index
,
1833 param_type
, &aggpos
, NE_EXPR
,
1837 /* Do not create sub-predicate for range that is beyond low bound
1839 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1841 p_seg
&= add_condition (summary
, params_summary
, index
,
1842 param_type
, &aggpos
,
1843 LT_EXPR
, min
, param_ops
);
1844 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1847 /* Do not create sub-predicate for range that is beyond up bound
1849 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1855 p_seg
= add_condition (summary
, params_summary
, index
,
1856 param_type
, &aggpos
, GT_EXPR
,
1861 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1862 *(ipa_predicate
*) e
->aux
1863 = p_all
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1865 vec_free (param_ops
);
1869 /* For each BB in NODE attach to its AUX pointer predicate under
1870 which it is executable. */
1873 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1874 struct cgraph_node
*node
,
1875 class ipa_fn_summary
*summary
,
1876 class ipa_node_params
*params_summary
)
1878 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1882 FOR_EACH_BB_FN (bb
, my_function
)
1884 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1885 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1888 /* Entry block is always executable. */
1889 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1890 = edge_predicate_pool
.allocate ();
1891 *(ipa_predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1893 /* A simple dataflow propagation of predicates forward in the CFG.
1894 TODO: work in reverse postorder. */
1898 FOR_EACH_BB_FN (bb
, my_function
)
1900 ipa_predicate p
= false;
1903 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1907 ipa_predicate this_bb_predicate
1908 = *(ipa_predicate
*) e
->src
->aux
;
1910 this_bb_predicate
&= (*(ipa_predicate
*) e
->aux
);
1911 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1918 basic_block pdom_bb
;
1923 bb
->aux
= edge_predicate_pool
.allocate ();
1924 *((ipa_predicate
*) bb
->aux
) = p
;
1926 else if (p
!= *(ipa_predicate
*) bb
->aux
)
1928 /* This OR operation is needed to ensure monotonous data flow
1929 in the case we hit the limit on number of clauses and the
1930 and/or operations above give approximate answers. */
1931 p
= p
.or_with (summary
->conds
, *(ipa_predicate
*)bb
->aux
);
1932 if (p
!= *(ipa_predicate
*)bb
->aux
)
1935 *((ipa_predicate
*)bb
->aux
) = p
;
1939 /* For switch/if statement, we can OR-combine predicates of all
1940 its cases/branches to get predicate for basic block in their
1941 convergence point, but sometimes this will generate very
1942 complicated predicate. Actually, we can get simplified
1943 predicate in another way by using the fact that predicate
1944 for a basic block must also hold true for its post dominators.
1945 To be specific, basic block in convergence point of
1946 conditional statement should include predicate of the
1948 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1949 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1951 else if (!pdom_bb
->aux
)
1954 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1955 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1957 else if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1959 p
= p
.or_with (summary
->conds
,
1960 *(ipa_predicate
*)pdom_bb
->aux
);
1961 if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1964 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1973 /* Return predicate specifying when the STMT might have result that is not
1974 a compile time constant. */
1976 static ipa_predicate
1977 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1978 class ipa_fn_summary
*summary
,
1979 class ipa_node_params
*params_summary
,
1981 vec
<ipa_predicate
> nonconstant_names
)
1986 while (UNARY_CLASS_P (expr
))
1987 expr
= TREE_OPERAND (expr
, 0);
1989 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1990 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1991 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1992 ipa_predicate::changed
, NULL_TREE
);
1993 if (is_gimple_min_invariant (expr
))
1995 if (TREE_CODE (expr
) == SSA_NAME
)
1996 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1997 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
2000 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2002 TREE_OPERAND (expr
, 0),
2008 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2010 TREE_OPERAND (expr
, 1),
2012 return p1
.or_with (summary
->conds
, p2
);
2014 else if (TREE_CODE (expr
) == COND_EXPR
)
2017 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2019 TREE_OPERAND (expr
, 0),
2025 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2027 TREE_OPERAND (expr
, 1),
2031 p1
= p1
.or_with (summary
->conds
, p2
);
2032 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2034 TREE_OPERAND (expr
, 2),
2036 return p2
.or_with (summary
->conds
, p1
);
2038 else if (TREE_CODE (expr
) == CALL_EXPR
)
2049 /* Return predicate specifying when the STMT might have result that is not
2050 a compile time constant. */
2052 static ipa_predicate
2053 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2054 class ipa_fn_summary
*summary
,
2055 class ipa_node_params
*params_summary
,
2057 vec
<ipa_predicate
> nonconstant_names
)
2059 ipa_predicate p
= true;
2062 tree param_type
= NULL_TREE
;
2063 ipa_predicate op_non_const
;
2066 struct agg_position_info aggpos
;
2068 /* What statements might be optimized away
2069 when their arguments are constant. */
2070 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2071 && gimple_code (stmt
) != GIMPLE_COND
2072 && gimple_code (stmt
) != GIMPLE_SWITCH
2073 && (gimple_code (stmt
) != GIMPLE_CALL
2074 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2077 /* Stores will stay anyway. */
2078 if (gimple_store_p (stmt
))
2081 is_load
= gimple_assign_load_p (stmt
);
2083 /* Loads can be optimized when the value is known. */
2086 tree op
= gimple_assign_rhs1 (stmt
);
2087 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2094 /* See if we understand all operands before we start
2095 adding conditionals. */
2096 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2098 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2099 /* For arguments we can build a condition. */
2100 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2102 if (TREE_CODE (use
) != SSA_NAME
)
2104 /* If we know when operand is constant,
2105 we still can say something useful. */
2106 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2113 add_condition (summary
, params_summary
,
2114 base_index
, param_type
, &aggpos
,
2115 ipa_predicate::changed
, NULL_TREE
);
2117 op_non_const
= false;
2118 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2120 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2123 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2125 if (index
!= base_index
)
2126 p
= add_condition (summary
, params_summary
, index
,
2127 TREE_TYPE (parm
), NULL
,
2128 ipa_predicate::changed
, NULL_TREE
);
2133 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2134 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2136 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2137 && gimple_op (stmt
, 0)
2138 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2139 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2141 return op_non_const
;
2144 struct record_modified_bb_info
2151 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2152 probability how often it changes between USE_BB.
2153 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2154 is in different loop nest, we can do better.
2155 This is all just estimate. In theory we look for minimal cut separating
2156 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2160 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2162 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2163 if (l
&& l
->header
->count
< init_bb
->count
)
2168 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2169 set except for info->stmt. */
2172 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2174 struct record_modified_bb_info
*info
=
2175 (struct record_modified_bb_info
*) data
;
2176 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2178 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2180 bitmap_set_bit (info
->bb_set
,
2181 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2182 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2184 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2185 gimple_bb (info
->stmt
))->index
);
2188 fprintf (dump_file
, " Param ");
2189 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2190 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2191 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2193 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2194 gimple_bb (info
->stmt
))->index
);
2195 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2200 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2201 will change since last invocation of STMT.
2203 Value 0 is reserved for compile time invariants.
2204 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2205 ought to be REG_BR_PROB_BASE / estimated_iters. */
2208 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2210 tree op
= gimple_call_arg (stmt
, i
);
2211 basic_block bb
= gimple_bb (stmt
);
2213 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2214 op
= TREE_OPERAND (op
, 0);
2216 tree base
= get_base_address (op
);
2218 /* Global invariants never change. */
2219 if (is_gimple_min_invariant (base
))
2222 /* We would have to do non-trivial analysis to really work out what
2223 is the probability of value to change (i.e. when init statement
2224 is in a sibling loop of the call).
2226 We do an conservative estimate: when call is executed N times more often
2227 than the statement defining value, we take the frequency 1/N. */
2228 if (TREE_CODE (base
) == SSA_NAME
)
2230 profile_count init_count
;
2232 if (!bb
->count
.nonzero_p ())
2233 return REG_BR_PROB_BASE
;
2235 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2236 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2238 init_count
= get_minimal_bb
2239 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2240 gimple_bb (stmt
))->count
;
2242 if (init_count
< bb
->count
)
2243 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2244 * REG_BR_PROB_BASE
).to_int (), 1);
2245 return REG_BR_PROB_BASE
;
2250 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2251 struct record_modified_bb_info info
;
2252 tree init
= ctor_for_folding (base
);
2254 if (init
!= error_mark_node
)
2256 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2257 return REG_BR_PROB_BASE
;
2260 fprintf (dump_file
, " Analyzing param change probability of ");
2261 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2262 fprintf (dump_file
, "\n");
2264 ao_ref_init (&refd
, op
);
2267 info
.bb_set
= BITMAP_ALLOC (NULL
);
2269 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2270 NULL
, NULL
, fbi
->aa_walk_budget
);
2272 fbi
->aa_walk_budget
-= walked
;
2273 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2276 fbi
->aa_walk_budget
= 0;
2280 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2282 fprintf (dump_file
, " Set in same BB as used.\n");
2284 BITMAP_FREE (info
.bb_set
);
2285 return REG_BR_PROB_BASE
;
2290 /* Lookup the most frequent update of the value and believe that
2291 it dominates all the other; precise analysis here is difficult. */
2292 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2293 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2296 fprintf (dump_file
, " Set with count ");
2297 max
.dump (dump_file
);
2298 fprintf (dump_file
, " and used with count ");
2299 bb
->count
.dump (dump_file
);
2300 fprintf (dump_file
, " freq %f\n",
2301 max
.to_sreal_scale (bb
->count
).to_double ());
2304 BITMAP_FREE (info
.bb_set
);
2305 if (max
< bb
->count
)
2306 return MAX ((max
.to_sreal_scale (bb
->count
)
2307 * REG_BR_PROB_BASE
).to_int (), 1);
2308 return REG_BR_PROB_BASE
;
2312 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2313 sub-graph and if the predicate the condition depends on is known. If so,
2314 return true and store the pointer the predicate in *P. */
2317 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2318 ipa_fn_summary
*summary
,
2319 class ipa_node_params
*params_summary
,
2322 vec
<ipa_predicate
> nonconstant_names
)
2326 basic_block first_bb
= NULL
;
2329 if (single_pred_p (bb
))
2335 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2337 if (single_succ_p (e
->src
))
2339 if (!single_pred_p (e
->src
))
2342 first_bb
= single_pred (e
->src
);
2343 else if (single_pred (e
->src
) != first_bb
)
2350 else if (e
->src
!= first_bb
)
2358 stmt
= last_stmt (first_bb
);
2360 || gimple_code (stmt
) != GIMPLE_COND
2361 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2364 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2365 gimple_cond_lhs (stmt
),
2373 /* Given a PHI statement in a function described by inline properties SUMMARY
2374 and *P being the predicate describing whether the selected PHI argument is
2375 known, store a predicate for the result of the PHI statement into
2376 NONCONSTANT_NAMES, if possible. */
2379 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2381 vec
<ipa_predicate
> nonconstant_names
)
2385 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2387 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2388 if (!is_gimple_min_invariant (arg
))
2390 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2391 *p
= p
->or_with (summary
->conds
,
2392 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2400 fprintf (dump_file
, "\t\tphi predicate: ");
2401 p
->dump (dump_file
, summary
->conds
);
2403 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2406 /* For a typical usage of __builtin_expect (a<b, 1), we
2407 may introduce an extra relation stmt:
2408 With the builtin, we have
2411 t3 = __builtin_expect (t2, 1);
2414 Without the builtin, we have
2417 This affects the size/time estimation and may have
2418 an impact on the earlier inlining.
2419 Here find this pattern and fix it up later. */
2422 find_foldable_builtin_expect (basic_block bb
)
2424 gimple_stmt_iterator bsi
;
2426 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2428 gimple
*stmt
= gsi_stmt (bsi
);
2429 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2430 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2431 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2433 tree var
= gimple_call_lhs (stmt
);
2434 tree arg
= gimple_call_arg (stmt
, 0);
2435 use_operand_p use_p
;
2442 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2444 while (TREE_CODE (arg
) == SSA_NAME
)
2446 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2447 if (!is_gimple_assign (stmt_tmp
))
2449 switch (gimple_assign_rhs_code (stmt_tmp
))
2468 arg
= gimple_assign_rhs1 (stmt_tmp
);
2471 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2472 && gimple_code (use_stmt
) == GIMPLE_COND
)
2479 /* Return true when the basic blocks contains only clobbers followed by RESX.
2480 Such BBs are kept around to make removal of dead stores possible with
2481 presence of EH and will be optimized out by optimize_clobbers later in the
2484 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2485 that can be clobber only, too.. When it is false, the RESX is not necessary
2486 on the end of basic block. */
2489 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2491 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2497 if (gsi_end_p (gsi
))
2499 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2503 else if (!single_succ_p (bb
))
2506 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2508 gimple
*stmt
= gsi_stmt (gsi
);
2509 if (is_gimple_debug (stmt
))
2511 if (gimple_clobber_p (stmt
))
2513 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2518 /* See if all predecessors are either throws or clobber only BBs. */
2519 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2520 if (!(e
->flags
& EDGE_EH
)
2521 && !clobber_only_eh_bb_p (e
->src
, false))
2527 /* Return true if STMT compute a floating point expression that may be affected
2528 by -ffast-math and similar flags. */
2531 fp_expression_p (gimple
*stmt
)
2536 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2537 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2542 /* Return true if T references memory location that is local
2543 for the function (that means, dead after return) or read-only. */
2546 refs_local_or_readonly_memory_p (tree t
)
2548 /* Non-escaping memory is fine. */
2549 t
= get_base_address (t
);
2550 if ((TREE_CODE (t
) == MEM_REF
2551 || TREE_CODE (t
) == TARGET_MEM_REF
))
2552 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2554 /* Automatic variables are fine. */
2556 && auto_var_in_fn_p (t
, current_function_decl
))
2559 /* Read-only variables are fine. */
2560 if (DECL_P (t
) && TREE_READONLY (t
))
2566 /* Return true if T is a pointer pointing to memory location that is local
2567 for the function (that means, dead after return) or read-only. */
2570 points_to_local_or_readonly_memory_p (tree t
)
2572 /* See if memory location is clearly invalid. */
2573 if (integer_zerop (t
))
2574 return flag_delete_null_pointer_checks
;
2575 if (TREE_CODE (t
) == SSA_NAME
)
2577 /* For IPA passes we can consinder accesses to return slot local
2578 even if it is not local in the sense that memory is dead by
2579 the end of founction.
2580 The outer function will see a store in the call assignment
2581 and thus this will do right thing for all uses of this
2582 function in the current IPA passes (modref, pure/const discovery
2583 and inlining heuristics). */
2584 if (DECL_RESULT (current_function_decl
)
2585 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl
))
2586 && t
== ssa_default_def (cfun
, DECL_RESULT (current_function_decl
)))
2588 return !ptr_deref_may_alias_global_p (t
);
2590 if (TREE_CODE (t
) == ADDR_EXPR
)
2591 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2596 /* Analyze function body for NODE.
2597 EARLY indicates run from early optimization pipeline. */
2600 analyze_function_body (struct cgraph_node
*node
, bool early
)
2602 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2603 /* Estimate static overhead for function prologue/epilogue and alignment. */
2604 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2605 /* Benefits are scaled by probability of elimination that is in range
2608 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2610 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2611 ipa_node_params
*params_summary
2612 = early
? NULL
: ipa_node_params_sum
->get (node
);
2613 ipa_predicate bb_predicate
;
2614 struct ipa_func_body_info fbi
;
2615 vec
<ipa_predicate
> nonconstant_names
= vNULL
;
2618 gimple
*fix_builtin_expect_stmt
;
2620 gcc_assert (my_function
&& my_function
->cfg
);
2621 gcc_assert (cfun
== my_function
);
2623 memset(&fbi
, 0, sizeof(fbi
));
2624 vec_free (info
->conds
);
2626 info
->size_time_table
.release ();
2627 info
->call_size_time_table
.release ();
2629 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2630 so we can produce proper inline hints.
2632 When optimizing and analyzing for early inliner, initialize node params
2633 so we can produce correct BB predicates. */
2635 if (opt_for_fn (node
->decl
, optimize
))
2637 calculate_dominance_info (CDI_DOMINATORS
);
2638 calculate_dominance_info (CDI_POST_DOMINATORS
);
2640 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2643 ipa_check_create_node_params ();
2644 ipa_initialize_node_params (node
);
2647 if (ipa_node_params_sum
)
2650 fbi
.info
= ipa_node_params_sum
->get (node
);
2651 fbi
.bb_infos
= vNULL
;
2652 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2653 fbi
.param_count
= count_formal_params (node
->decl
);
2654 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2656 nonconstant_names
.safe_grow_cleared
2657 (SSANAMES (my_function
)->length (), true);
2662 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2663 node
->dump_name ());
2665 /* When we run into maximal number of entries, we assign everything to the
2666 constant truth case. Be sure to have it in list. */
2667 bb_predicate
= true;
2668 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2670 bb_predicate
= ipa_predicate::not_inlined ();
2671 info
->account_size_time (opt_for_fn (node
->decl
,
2672 param_uninlined_function_insns
)
2673 * ipa_fn_summary::size_scale
,
2674 opt_for_fn (node
->decl
,
2675 param_uninlined_function_time
),
2680 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2681 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2682 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2683 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2684 for (n
= 0; n
< nblocks
; n
++)
2686 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2687 freq
= bb
->count
.to_sreal_scale (entry_count
);
2688 if (clobber_only_eh_bb_p (bb
))
2690 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2691 fprintf (dump_file
, "\n Ignoring BB %i;"
2692 " it will be optimized away by cleanup_clobbers\n",
2697 /* TODO: Obviously predicates can be propagated down across CFG. */
2701 bb_predicate
= *(ipa_predicate
*)bb
->aux
;
2703 bb_predicate
= false;
2706 bb_predicate
= true;
2708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2710 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2711 bb_predicate
.dump (dump_file
, info
->conds
);
2714 if (fbi
.info
&& nonconstant_names
.exists ())
2716 ipa_predicate phi_predicate
;
2717 bool first_phi
= true;
2719 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2723 && !phi_result_unknown_predicate (&fbi
, info
,
2730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2732 fprintf (dump_file
, " ");
2733 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2735 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2740 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2742 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2743 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2745 gimple
*stmt
= gsi_stmt (bsi
);
2746 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2747 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2749 ipa_predicate will_be_nonconstant
;
2751 /* This relation stmt should be folded after we remove
2752 __builtin_expect call. Adjust the cost here. */
2753 if (stmt
== fix_builtin_expect_stmt
)
2759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2761 fprintf (dump_file
, " ");
2762 print_gimple_stmt (dump_file
, stmt
, 0);
2763 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2764 freq
.to_double (), this_size
,
2768 if (is_gimple_call (stmt
)
2769 && !gimple_call_internal_p (stmt
))
2771 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2772 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2774 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2775 resolved as constant. We however don't want to optimize
2776 out the cgraph edges. */
2777 if (nonconstant_names
.exists ()
2778 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2779 && gimple_call_lhs (stmt
)
2780 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2782 ipa_predicate false_p
= false;
2783 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2786 if (ipa_node_params_sum
)
2788 int count
= gimple_call_num_args (stmt
);
2792 es
->param
.safe_grow_cleared (count
, true);
2793 for (i
= 0; i
< count
; i
++)
2795 int prob
= param_change_prob (&fbi
, stmt
, i
);
2796 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2797 es
->param
[i
].change_prob
= prob
;
2798 es
->param
[i
].points_to_local_or_readonly_memory
2799 = points_to_local_or_readonly_memory_p
2800 (gimple_call_arg (stmt
, i
));
2803 /* We cannot setup VLA parameters during inlining. */
2804 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2805 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2807 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2810 es
->call_stmt_size
= this_size
;
2811 es
->call_stmt_time
= this_time
;
2812 es
->loop_depth
= bb_loop_depth (bb
);
2813 edge_set_predicate (edge
, &bb_predicate
);
2814 if (edge
->speculative
)
2816 cgraph_edge
*indirect
2817 = edge
->speculative_call_indirect_edge ();
2818 ipa_call_summary
*es2
2819 = ipa_call_summaries
->get_create (indirect
);
2820 ipa_call_summaries
->duplicate (edge
, indirect
,
2823 /* Edge is the first direct call.
2824 create and duplicate call summaries for multiple
2825 speculative call targets. */
2826 for (cgraph_edge
*direct
2827 = edge
->next_speculative_call_target ();
2829 direct
= direct
->next_speculative_call_target ())
2831 ipa_call_summary
*es3
2832 = ipa_call_summaries
->get_create (direct
);
2833 ipa_call_summaries
->duplicate (edge
, direct
,
2839 /* TODO: When conditional jump or switch is known to be constant, but
2840 we did not translate it into the predicates, we really can account
2841 just maximum of the possible paths. */
2844 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2845 stmt
, nonconstant_names
);
2847 will_be_nonconstant
= true;
2848 if (this_time
|| this_size
)
2850 sreal final_time
= (sreal
)this_time
* freq
;
2852 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2853 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2855 "\t\t50%% will be eliminated by inlining\n");
2856 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2857 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2859 ipa_predicate p
= bb_predicate
& will_be_nonconstant
;
2861 /* We can ignore statement when we proved it is never going
2862 to happen, but we cannot do that for call statements
2863 because edges are accounted specially. */
2865 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2871 /* We account everything but the calls. Calls have their own
2872 size/time info attached to cgraph edges. This is necessary
2873 in order to make the cost disappear after inlining. */
2874 if (!is_gimple_call (stmt
))
2879 = bb_predicate
& ipa_predicate::not_inlined ();
2880 info
->account_size_time (this_size
* prob
,
2881 (final_time
* prob
) / 2, ip
,
2885 info
->account_size_time (this_size
* (2 - prob
),
2886 (final_time
* (2 - prob
) / 2),
2891 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2893 info
->fp_expressions
= true;
2895 fprintf (dump_file
, " fp_expression set\n");
2899 /* Account cost of address calculations in the statements. */
2900 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2902 for (tree op
= gimple_op (stmt
, i
);
2903 op
&& handled_component_p (op
);
2904 op
= TREE_OPERAND (op
, 0))
2905 if ((TREE_CODE (op
) == ARRAY_REF
2906 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2907 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2909 ipa_predicate p
= bb_predicate
;
2911 p
= p
& will_be_nonconstant_expr_predicate
2912 (&fbi
, info
, params_summary
,
2913 TREE_OPERAND (op
, 1),
2921 "\t\tAccounting address calculation.\n");
2922 info
->account_size_time (ipa_fn_summary::size_scale
,
2934 if (nonconstant_names
.exists () && !early
)
2936 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2938 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
2939 param_ipa_max_loop_predicates
);
2941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2942 flow_loops_dump (dump_file
, NULL
, 0);
2944 for (auto loop
: loops_list (cfun
, 0))
2946 ipa_predicate loop_iterations
= true;
2950 class tree_niter_desc niter_desc
;
2951 if (!loop
->header
->aux
)
2954 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2955 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2957 bb_predicate
= *(ipa_predicate
*)loop
->header
->aux
;
2958 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
2959 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2960 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2961 && !is_gimple_min_invariant (niter_desc
.niter
))
2963 ipa_predicate will_be_nonconstant
2964 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2968 if (will_be_nonconstant
!= true)
2969 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2970 if (will_be_nonconstant
!= true
2971 && will_be_nonconstant
!= false)
2972 loop_iterations
&= will_be_nonconstant
;
2974 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
2975 phdr_freq
, max_loop_predicates
);
2978 /* To avoid quadratic behavior we analyze stride predicates only
2979 with respect to the containing loop. Thus we simply iterate
2980 over all defs in the outermost loop body. */
2981 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2982 loop
!= NULL
; loop
= loop
->next
)
2984 ipa_predicate loop_stride
= true;
2985 basic_block
*body
= get_loop_body (loop
);
2986 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2987 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2988 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2990 gimple_stmt_iterator gsi
;
2994 bb_predicate
= *(ipa_predicate
*)body
[i
]->aux
;
2995 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2998 gimple
*stmt
= gsi_stmt (gsi
);
3000 if (!is_gimple_assign (stmt
))
3003 tree def
= gimple_assign_lhs (stmt
);
3004 if (TREE_CODE (def
) != SSA_NAME
)
3008 if (!simple_iv (loop_containing_stmt (stmt
),
3009 loop_containing_stmt (stmt
),
3011 || is_gimple_min_invariant (iv
.step
))
3014 ipa_predicate will_be_nonconstant
3015 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3019 if (will_be_nonconstant
!= true)
3020 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3021 if (will_be_nonconstant
!= true
3022 && will_be_nonconstant
!= false)
3023 loop_stride
= loop_stride
& will_be_nonconstant
;
3026 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3027 phdr_freq
, max_loop_predicates
);
3032 FOR_ALL_BB_FN (bb
, my_function
)
3038 edge_predicate_pool
.remove ((ipa_predicate
*)bb
->aux
);
3040 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3043 edge_predicate_pool
.remove ((ipa_predicate
*)e
->aux
);
3047 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3048 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3050 ss
->self_size
= size
;
3051 nonconstant_names
.release ();
3052 ipa_release_body_info (&fbi
);
3053 if (opt_for_fn (node
->decl
, optimize
))
3056 loop_optimizer_finalize ();
3057 else if (!ipa_edge_args_sum
)
3058 ipa_free_all_node_params ();
3059 free_dominance_info (CDI_DOMINATORS
);
3060 free_dominance_info (CDI_POST_DOMINATORS
);
3064 fprintf (dump_file
, "\n");
3065 ipa_dump_fn_summary (dump_file
, node
);
3070 /* Compute function summary.
3071 EARLY is true when we compute parameters during early opts. */
3074 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3076 HOST_WIDE_INT self_stack_size
;
3077 struct cgraph_edge
*e
;
3079 gcc_assert (!node
->inlined_to
);
3081 if (!ipa_fn_summaries
)
3082 ipa_fn_summary_alloc ();
3084 /* Create a new ipa_fn_summary. */
3085 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3086 ipa_fn_summaries
->remove (node
);
3087 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3088 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3090 /* Estimate the stack size for the function if we're optimizing. */
3091 self_stack_size
= optimize
&& !node
->thunk
3092 ? estimated_stack_frame_size (node
) : 0;
3093 size_info
->estimated_self_stack_size
= self_stack_size
;
3094 info
->estimated_stack_size
= self_stack_size
;
3098 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3099 ipa_predicate t
= true;
3101 node
->can_change_signature
= false;
3102 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3103 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3104 info
->account_size_time (ipa_fn_summary::size_scale
3105 * opt_for_fn (node
->decl
,
3106 param_uninlined_function_thunk_insns
),
3107 opt_for_fn (node
->decl
,
3108 param_uninlined_function_thunk_time
), t
, t
);
3109 t
= ipa_predicate::not_inlined ();
3110 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3111 ipa_update_overall_fn_summary (node
);
3112 size_info
->self_size
= size_info
->size
;
3113 if (stdarg_p (TREE_TYPE (node
->decl
)))
3115 info
->inlinable
= false;
3116 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3119 info
->inlinable
= true;
3123 /* Even is_gimple_min_invariant rely on current_function_decl. */
3124 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3126 /* During IPA profile merging we may be called w/o virtual SSA form
3128 update_ssa (TODO_update_ssa_only_virtuals
);
3130 /* Can this function be inlined at all? */
3131 if (!opt_for_fn (node
->decl
, optimize
)
3132 && !lookup_attribute ("always_inline",
3133 DECL_ATTRIBUTES (node
->decl
)))
3134 info
->inlinable
= false;
3136 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3138 /* Type attributes can use parameter indices to describe them. */
3139 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
3140 /* Likewise for #pragma omp declare simd functions or functions
3141 with simd attribute. */
3142 || lookup_attribute ("omp declare simd",
3143 DECL_ATTRIBUTES (node
->decl
)))
3144 node
->can_change_signature
= false;
3147 /* Otherwise, inlinable functions always can change signature. */
3148 if (info
->inlinable
)
3149 node
->can_change_signature
= true;
3152 /* Functions calling builtin_apply cannot change signature. */
3153 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3155 tree
cdecl = e
->callee
->decl
;
3156 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
3157 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
3160 node
->can_change_signature
= !e
;
3163 analyze_function_body (node
, early
);
3167 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3168 size_info
->size
= size_info
->self_size
;
3169 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3171 /* Code above should compute exactly the same result as
3172 ipa_update_overall_fn_summary except for case when speculative
3173 edges are present since these are accounted to size but not
3174 self_size. Do not compare time since different order the roundoff
3175 errors result in slight changes. */
3176 ipa_update_overall_fn_summary (node
);
3179 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3182 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3187 /* Compute parameters of functions used by inliner using
3188 current_function_decl. */
3191 compute_fn_summary_for_current (void)
3193 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3197 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3198 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3199 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3202 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3203 int *size
, int *time
,
3204 ipa_call_arg_values
*avals
)
3207 struct cgraph_node
*callee
;
3208 class ipa_fn_summary
*isummary
;
3209 enum availability avail
;
3213 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3215 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3218 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3219 if (!target
|| speculative
)
3222 /* Account for difference in cost between indirect and direct calls. */
3223 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3224 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3225 gcc_checking_assert (*time
>= 0);
3226 gcc_checking_assert (*size
>= 0);
3228 callee
= cgraph_node::get (target
);
3229 if (!callee
|| !callee
->definition
)
3231 callee
= callee
->function_symbol (&avail
);
3232 if (avail
< AVAIL_AVAILABLE
)
3234 isummary
= ipa_fn_summaries
->get (callee
);
3235 if (isummary
== NULL
)
3238 return isummary
->inlinable
;
3241 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3242 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3243 devirtualized. AVALS, if non-NULL, describes the context of the call site
3244 as far as values of parameters are concerened. */
3247 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3248 sreal
*time
, ipa_call_arg_values
*avals
,
3251 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3252 int call_size
= es
->call_stmt_size
;
3253 int call_time
= es
->call_stmt_time
;
3256 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3257 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3258 *hints
|= INLINE_HINT_indirect_call
;
3259 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3262 *min_size
+= cur_size
;
3264 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3268 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3269 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3272 Helper for estimate_calls_size_and_time which does the same but
3273 (in most cases) faster. */
3276 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3277 int *min_size
, sreal
*time
,
3279 clause_t possible_truths
,
3280 ipa_call_arg_values
*avals
)
3282 struct cgraph_edge
*e
;
3283 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3285 if (!e
->inline_failed
)
3287 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3288 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3289 hints
, possible_truths
, avals
);
3293 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3295 /* Do not care about zero sized builtins. */
3296 if (!es
->call_stmt_size
)
3298 gcc_checking_assert (!es
->call_stmt_time
);
3302 || es
->predicate
->evaluate (possible_truths
))
3304 /* Predicates of calls shall not use NOT_CHANGED codes,
3305 so we do not need to compute probabilities. */
3306 estimate_edge_size_and_time (e
, size
,
3307 es
->predicate
? NULL
: min_size
,
3308 time
, avals
, hints
);
3311 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3313 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3315 || es
->predicate
->evaluate (possible_truths
))
3316 estimate_edge_size_and_time (e
, size
,
3317 es
->predicate
? NULL
: min_size
,
3318 time
, avals
, hints
);
3322 /* Populate sum->call_size_time_table for edges from NODE. */
3325 summarize_calls_size_and_time (struct cgraph_node
*node
,
3326 ipa_fn_summary
*sum
)
3328 struct cgraph_edge
*e
;
3329 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3331 if (!e
->inline_failed
)
3333 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3334 summarize_calls_size_and_time (e
->callee
, sum
);
3340 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3342 ipa_predicate pred
= true;
3343 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3346 pred
= *es
->predicate
;
3347 sum
->account_size_time (size
, time
, pred
, pred
, true);
3349 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3354 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3355 ipa_predicate pred
= true;
3356 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3359 pred
= *es
->predicate
;
3360 sum
->account_size_time (size
, time
, pred
, pred
, true);
3364 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3365 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3366 context of the call site. */
3369 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3370 int *min_size
, sreal
*time
,
3372 clause_t possible_truths
,
3373 ipa_call_arg_values
*avals
)
3375 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3376 bool use_table
= true;
3378 gcc_assert (node
->callees
|| node
->indirect_calls
);
3380 /* During early inlining we do not calculate info for very
3381 large functions and thus there is no need for producing
3383 if (!ipa_node_params_sum
)
3385 /* Do not calculate summaries for simple wrappers; it is waste
3387 else if (node
->callees
&& node
->indirect_calls
3388 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3390 /* If there is an indirect edge that may be optimized, we need
3391 to go the slow way. */
3392 else if (avals
&& hints
3393 && (avals
->m_known_vals
.length ()
3394 || avals
->m_known_contexts
.length ()
3395 || avals
->m_known_aggs
.length ()))
3397 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (node
);
3398 unsigned int nargs
= params_summary
3399 ? ipa_get_param_count (params_summary
) : 0;
3401 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3403 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3404 && (avals
->safe_sval_at (i
)
3405 || (avals
->m_known_aggs
.length () > i
3406 && avals
->m_known_aggs
[i
].items
.length ())))
3408 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3409 && (avals
->m_known_contexts
.length () > i
3410 && !avals
->m_known_contexts
[i
].useless_p ()))
3415 /* Fast path is via the call size time table. */
3418 /* Build summary if it is absent. */
3419 if (!sum
->call_size_time_table
.length ())
3421 ipa_predicate true_pred
= true;
3422 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3423 summarize_calls_size_and_time (node
, sum
);
3426 int old_size
= *size
;
3427 sreal old_time
= time
? *time
: 0;
3430 *min_size
+= sum
->call_size_time_table
[0].size
;
3435 /* Walk the table and account sizes and times. */
3436 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3438 if (e
->exec_predicate
.evaluate (possible_truths
))
3445 /* Be careful and see if both methods agree. */
3446 if ((flag_checking
|| dump_file
)
3447 /* Do not try to sanity check when we know we lost some
3449 && sum
->call_size_time_table
.length ()
3450 < ipa_fn_summary::max_size_time_table_size
)
3452 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3453 possible_truths
, avals
);
3454 gcc_assert (*size
== old_size
);
3455 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3457 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3458 old_time
.to_double (),
3459 time
->to_double ());
3462 /* Slow path by walking all edges. */
3464 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3465 possible_truths
, avals
);
3468 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3469 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3472 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3473 clause_t nonspec_possible_truths
,
3474 vec
<inline_param_summary
>
3475 inline_param_summary
,
3476 ipa_auto_call_arg_values
*arg_values
)
3477 : m_node (node
), m_possible_truths (possible_truths
),
3478 m_nonspec_possible_truths (nonspec_possible_truths
),
3479 m_inline_param_summary (inline_param_summary
),
3480 m_avals (arg_values
)
3484 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3487 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3489 m_node
= ctx
.m_node
;
3490 m_possible_truths
= ctx
.m_possible_truths
;
3491 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3492 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3493 unsigned int nargs
= params_summary
3494 ? ipa_get_param_count (params_summary
) : 0;
3496 m_inline_param_summary
= vNULL
;
3497 /* Copy the info only if there is at least one useful entry. */
3498 if (ctx
.m_inline_param_summary
.exists ())
3500 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3502 for (unsigned int i
= 0; i
< n
; i
++)
3503 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3504 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3506 m_inline_param_summary
3507 = ctx
.m_inline_param_summary
.copy ();
3511 m_avals
.m_known_vals
= vNULL
;
3512 if (ctx
.m_avals
.m_known_vals
.exists ())
3514 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3516 for (unsigned int i
= 0; i
< n
; i
++)
3517 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3518 && ctx
.m_avals
.m_known_vals
[i
])
3520 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3525 m_avals
.m_known_contexts
= vNULL
;
3526 if (ctx
.m_avals
.m_known_contexts
.exists ())
3528 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3530 for (unsigned int i
= 0; i
< n
; i
++)
3531 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3532 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3534 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3539 m_avals
.m_known_aggs
= vNULL
;
3540 if (ctx
.m_avals
.m_known_aggs
.exists ())
3542 unsigned int n
= MIN (ctx
.m_avals
.m_known_aggs
.length (), nargs
);
3544 for (unsigned int i
= 0; i
< n
; i
++)
3545 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3546 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3548 m_avals
.m_known_aggs
3549 = ipa_copy_agg_values (ctx
.m_avals
.m_known_aggs
);
3554 m_avals
.m_known_value_ranges
= vNULL
;
3557 /* Release memory used by known_vals/contexts/aggs vectors. and
3558 inline_param_summary. */
3561 ipa_cached_call_context::release ()
3563 /* See if context is initialized at first place. */
3566 ipa_release_agg_values (m_avals
.m_known_aggs
, true);
3567 m_avals
.m_known_vals
.release ();
3568 m_avals
.m_known_contexts
.release ();
3569 m_inline_param_summary
.release ();
3572 /* Return true if CTX describes the same call context as THIS. */
3575 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3577 if (m_node
!= ctx
.m_node
3578 || m_possible_truths
!= ctx
.m_possible_truths
3579 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3582 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3583 unsigned int nargs
= params_summary
3584 ? ipa_get_param_count (params_summary
) : 0;
3586 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3588 for (unsigned int i
= 0; i
< nargs
; i
++)
3590 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3592 if (i
>= m_inline_param_summary
.length ()
3593 || m_inline_param_summary
[i
].useless_p ())
3595 if (i
< ctx
.m_inline_param_summary
.length ()
3596 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3600 if (i
>= ctx
.m_inline_param_summary
.length ()
3601 || ctx
.m_inline_param_summary
[i
].useless_p ())
3603 if (i
< m_inline_param_summary
.length ()
3604 && !m_inline_param_summary
[i
].useless_p ())
3608 if (!m_inline_param_summary
[i
].equal_to
3609 (ctx
.m_inline_param_summary
[i
]))
3613 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3615 for (unsigned int i
= 0; i
< nargs
; i
++)
3617 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3619 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3621 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3622 && ctx
.m_avals
.m_known_vals
[i
])
3626 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3627 || !ctx
.m_avals
.m_known_vals
[i
])
3629 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3633 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3637 if (m_avals
.m_known_contexts
.exists ()
3638 || ctx
.m_avals
.m_known_contexts
.exists ())
3640 for (unsigned int i
= 0; i
< nargs
; i
++)
3642 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3644 if (i
>= m_avals
.m_known_contexts
.length ()
3645 || m_avals
.m_known_contexts
[i
].useless_p ())
3647 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3648 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3652 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3653 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3655 if (i
< m_avals
.m_known_contexts
.length ()
3656 && !m_avals
.m_known_contexts
[i
].useless_p ())
3660 if (!m_avals
.m_known_contexts
[i
].equal_to
3661 (ctx
.m_avals
.m_known_contexts
[i
]))
3665 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3667 for (unsigned int i
= 0; i
< nargs
; i
++)
3669 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3671 if (i
>= m_avals
.m_known_aggs
.length ()
3672 || m_avals
.m_known_aggs
[i
].is_empty ())
3674 if (i
< ctx
.m_avals
.m_known_aggs
.length ()
3675 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3679 if (i
>= ctx
.m_avals
.m_known_aggs
.length ()
3680 || ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3682 if (i
< m_avals
.m_known_aggs
.length ()
3683 && !m_avals
.m_known_aggs
[i
].is_empty ())
3687 if (!m_avals
.m_known_aggs
[i
].equal_to (ctx
.m_avals
.m_known_aggs
[i
]))
3694 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3695 this context. Always compute size and min_size. Only compute time and
3696 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3700 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3701 bool est_times
, bool est_hints
)
3703 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3708 ipa_hints hints
= 0;
3709 sreal loops_with_known_iterations
= 0;
3710 sreal loops_with_known_strides
= 0;
3713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3716 fprintf (dump_file
, " Estimating body: %s\n"
3717 " Known to be false: ", m_node
->dump_name ());
3719 for (i
= ipa_predicate::not_inlined_condition
;
3720 i
< (ipa_predicate::first_dynamic_condition
3721 + (int) vec_safe_length (info
->conds
)); i
++)
3722 if (!(m_possible_truths
& (1 << i
)))
3725 fprintf (dump_file
, ", ");
3727 dump_condition (dump_file
, info
->conds
, i
);
3731 if (m_node
->callees
|| m_node
->indirect_calls
)
3732 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3733 est_times
? &time
: NULL
,
3734 est_hints
? &hints
: NULL
, m_possible_truths
,
3737 sreal nonspecialized_time
= time
;
3739 min_size
+= info
->size_time_table
[0].size
;
3740 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3742 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3744 /* Because predicates are conservative, it can happen that nonconst is 1
3748 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3750 gcc_checking_assert (e
->time
>= 0);
3751 gcc_checking_assert (time
>= 0);
3753 /* We compute specialized size only because size of nonspecialized
3754 copy is context independent.
3756 The difference between nonspecialized execution and specialized is
3757 that nonspecialized is not going to have optimized out computations
3758 known to be constant in a specialized setting. */
3763 nonspecialized_time
+= e
->time
;
3766 else if (!m_inline_param_summary
.exists ())
3773 int prob
= e
->nonconst_predicate
.probability
3774 (info
->conds
, m_possible_truths
,
3775 m_inline_param_summary
);
3776 gcc_checking_assert (prob
>= 0);
3777 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3778 if (prob
== REG_BR_PROB_BASE
)
3781 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3783 gcc_checking_assert (time
>= 0);
3786 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3787 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3788 gcc_checking_assert (min_size
>= 0);
3789 gcc_checking_assert (size
>= 0);
3790 gcc_checking_assert (time
>= 0);
3791 /* nonspecialized_time should be always bigger than specialized time.
3792 Roundoff issues however may get into the way. */
3793 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3795 /* Roundoff issues may make specialized time bigger than nonspecialized
3796 time. We do not really want that to happen because some heuristics
3797 may get confused by seeing negative speedups. */
3798 if (time
> nonspecialized_time
)
3799 time
= nonspecialized_time
;
3804 hints
|= INLINE_HINT_in_scc
;
3805 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3806 hints
|= INLINE_HINT_declared_inline
;
3807 if (info
->builtin_constant_p_parms
.length ()
3808 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3809 hints
|= INLINE_HINT_builtin_constant_p
;
3811 ipa_freqcounting_predicate
*fcp
;
3812 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3813 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3815 hints
|= INLINE_HINT_loop_iterations
;
3816 loops_with_known_iterations
+= fcp
->freq
;
3818 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3820 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3821 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3823 hints
|= INLINE_HINT_loop_stride
;
3824 loops_with_known_strides
+= fcp
->freq
;
3826 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3829 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3830 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3834 fprintf (dump_file
, "\n size:%i", (int) size
);
3836 fprintf (dump_file
, " time:%f nonspec time:%f",
3837 time
.to_double (), nonspecialized_time
.to_double ());
3839 fprintf (dump_file
, " loops with known iterations:%f "
3840 "known strides:%f", loops_with_known_iterations
.to_double (),
3841 loops_with_known_strides
.to_double ());
3842 fprintf (dump_file
, "\n");
3846 estimates
->time
= time
;
3847 estimates
->nonspecialized_time
= nonspecialized_time
;
3849 estimates
->size
= size
;
3850 estimates
->min_size
= min_size
;
3852 estimates
->hints
= hints
;
3857 /* Estimate size and time needed to execute callee of EDGE assuming that
3858 parameters known to be constant at caller of EDGE are propagated.
3859 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3860 and types for parameters. */
3863 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3864 ipa_auto_call_arg_values
*avals
,
3865 ipa_call_estimates
*estimates
)
3867 clause_t clause
, nonspec_clause
;
3869 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
3871 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
3872 ctx
.estimate_size_and_time (estimates
);
3875 /* Return stack frame offset where frame of NODE is supposed to start inside
3876 of the function it is inlined to.
3877 Return 0 for functions that are not inlined. */
3880 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3882 HOST_WIDE_INT offset
= 0;
3883 if (!node
->inlined_to
)
3885 node
= node
->callers
->caller
;
3888 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3889 if (!node
->inlined_to
)
3891 node
= node
->callers
->caller
;
3896 /* Update summary information of inline clones after inlining.
3897 Compute peak stack usage. */
3900 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3902 struct cgraph_edge
*e
;
3904 ipa_propagate_frequency (node
);
3905 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3907 if (!e
->inline_failed
)
3908 inline_update_callee_summaries (e
->callee
, depth
);
3910 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3912 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3913 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3916 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3917 INLINED_EDGE has been inlined.
3919 When function A is inlined in B and A calls C with parameter that
3920 changes with probability PROB1 and C is known to be passthrough
3921 of argument if B that change with probability PROB2, the probability
3922 of change is now PROB1*PROB2. */
3925 remap_edge_params (struct cgraph_edge
*inlined_edge
,
3926 struct cgraph_edge
*edge
)
3928 if (ipa_node_params_sum
)
3931 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
3934 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3935 class ipa_call_summary
*inlined_es
3936 = ipa_call_summaries
->get (inlined_edge
);
3938 if (es
->param
.length () == 0)
3941 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3943 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3944 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3945 || jfunc
->type
== IPA_JF_ANCESTOR
)
3947 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3948 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3949 : ipa_get_jf_ancestor_formal_id (jfunc
);
3950 if (id
< (int) inlined_es
->param
.length ())
3952 int prob1
= es
->param
[i
].change_prob
;
3953 int prob2
= inlined_es
->param
[id
].change_prob
;
3954 int prob
= combine_probabilities (prob1
, prob2
);
3956 if (prob1
&& prob2
&& !prob
)
3959 es
->param
[i
].change_prob
= prob
;
3962 ->param
[id
].points_to_local_or_readonly_memory
)
3963 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3965 if (!es
->param
[i
].points_to_local_or_readonly_memory
3966 && jfunc
->type
== IPA_JF_CONST
3967 && points_to_local_or_readonly_memory_p
3968 (ipa_get_jf_constant (jfunc
)))
3969 es
->param
[i
].points_to_local_or_readonly_memory
= true;
3975 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3977 Remap predicates of callees of NODE. Rest of arguments match
3980 Also update change probabilities. */
3983 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
3984 struct cgraph_node
*node
,
3985 class ipa_fn_summary
*info
,
3986 class ipa_node_params
*params_summary
,
3987 class ipa_fn_summary
*callee_info
,
3988 const vec
<int> &operand_map
,
3989 const vec
<HOST_WIDE_INT
> &offset_map
,
3990 clause_t possible_truths
,
3991 ipa_predicate
*toplev_predicate
)
3993 struct cgraph_edge
*e
, *next
;
3994 for (e
= node
->callees
; e
; e
= next
)
3997 next
= e
->next_callee
;
3999 if (e
->inline_failed
)
4001 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4002 remap_edge_params (inlined_edge
, e
);
4006 p
= es
->predicate
->remap_after_inlining
4007 (info
, params_summary
,
4008 callee_info
, operand_map
,
4009 offset_map
, possible_truths
,
4011 edge_set_predicate (e
, &p
);
4014 edge_set_predicate (e
, toplev_predicate
);
4017 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
4018 params_summary
, callee_info
,
4019 operand_map
, offset_map
, possible_truths
,
4022 for (e
= node
->indirect_calls
; e
; e
= next
)
4024 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4026 next
= e
->next_callee
;
4028 remap_edge_params (inlined_edge
, e
);
4031 p
= es
->predicate
->remap_after_inlining
4032 (info
, params_summary
,
4033 callee_info
, operand_map
, offset_map
,
4034 possible_truths
, *toplev_predicate
);
4035 edge_set_predicate (e
, &p
);
4038 edge_set_predicate (e
, toplev_predicate
);
4042 /* Run remap_after_inlining on each predicate in V. */
4045 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4046 class ipa_node_params
*params_summary
,
4047 class ipa_fn_summary
*callee_info
,
4048 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4049 const vec
<int> &operand_map
,
4050 const vec
<HOST_WIDE_INT
> &offset_map
,
4051 clause_t possible_truths
,
4052 ipa_predicate
*toplev_predicate
)
4055 ipa_freqcounting_predicate
*fcp
;
4056 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4059 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4060 callee_info
, operand_map
,
4061 offset_map
, possible_truths
,
4063 if (p
!= false && p
!= true)
4064 *fcp
->predicate
&= p
;
4068 /* We inlined EDGE. Update summary of the function we inlined into. */
4071 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4073 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4074 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4075 ? edge
->caller
->inlined_to
: edge
->caller
);
4076 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4077 clause_t clause
= 0; /* not_inline is known to be false. */
4079 auto_vec
<int, 8> operand_map
;
4080 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4082 ipa_predicate toplev_predicate
;
4083 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4084 ipa_node_params
*params_summary
= (ipa_node_params_sum
4085 ? ipa_node_params_sum
->get (to
) : NULL
);
4088 toplev_predicate
= *es
->predicate
;
4090 toplev_predicate
= true;
4092 info
->fp_expressions
|= callee_info
->fp_expressions
;
4094 if (callee_info
->conds
)
4096 ipa_auto_call_arg_values avals
;
4097 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4099 if (ipa_node_params_sum
&& callee_info
->conds
)
4101 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4102 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4107 operand_map
.safe_grow_cleared (count
, true);
4108 offset_map
.safe_grow_cleared (count
, true);
4110 for (i
= 0; i
< count
; i
++)
4112 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4115 /* TODO: handle non-NOPs when merging. */
4116 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4118 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4119 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4120 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4123 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4125 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4126 if (offset
>= 0 && offset
< INT_MAX
)
4128 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4129 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4131 offset_map
[i
] = offset
;
4134 operand_map
[i
] = map
;
4135 gcc_assert (map
< ipa_get_param_count (params_summary
));
4139 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4140 if (ip
< count
&& operand_map
[ip
] >= 0)
4141 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4143 sreal freq
= edge
->sreal_frequency ();
4144 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4147 p
= e
->exec_predicate
.remap_after_inlining
4148 (info
, params_summary
,
4149 callee_info
, operand_map
,
4152 ipa_predicate nonconstp
;
4153 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4154 (info
, params_summary
,
4155 callee_info
, operand_map
,
4158 if (p
!= false && nonconstp
!= false)
4160 sreal add_time
= ((sreal
)e
->time
* freq
);
4161 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4163 if (prob
!= REG_BR_PROB_BASE
)
4164 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4165 if (prob
!= REG_BR_PROB_BASE
4166 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4168 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4169 (double) prob
/ REG_BR_PROB_BASE
);
4171 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4174 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4175 callee_info
, operand_map
,
4176 offset_map
, clause
, &toplev_predicate
);
4177 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4178 info
->loop_iterations
, operand_map
,
4179 offset_map
, clause
, &toplev_predicate
);
4180 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4181 info
->loop_strides
, operand_map
,
4182 offset_map
, clause
, &toplev_predicate
);
4184 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4185 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4187 if (info
->estimated_stack_size
< peak
)
4188 info
->estimated_stack_size
= peak
;
4190 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4191 if (info
->call_size_time_table
.length ())
4194 sreal edge_time
= 0;
4196 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4197 /* Unaccount size and time of the optimized out call. */
4198 info
->account_size_time (-edge_size
, -edge_time
,
4199 es
->predicate
? *es
->predicate
: true,
4200 es
->predicate
? *es
->predicate
: true,
4202 /* Account new calls. */
4203 summarize_calls_size_and_time (edge
->callee
, info
);
4206 /* Free summaries that are not maintained for inline clones/edges. */
4207 ipa_call_summaries
->remove (edge
);
4208 ipa_fn_summaries
->remove (edge
->callee
);
4209 ipa_remove_from_growth_caches (edge
);
4212 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4213 overall size and time. Recompute it.
4214 If RESET is true also recompute call_time_size_table. */
4217 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4219 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4220 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4224 size_info
->size
= 0;
4226 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4228 size_info
->size
+= e
->size
;
4229 info
->time
+= e
->time
;
4231 info
->min_size
= info
->size_time_table
[0].size
;
4233 info
->call_size_time_table
.release ();
4234 if (node
->callees
|| node
->indirect_calls
)
4235 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4237 ~(clause_t
) (1 << ipa_predicate::false_condition
),
4239 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4240 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4244 /* This function performs intraprocedural analysis in NODE that is required to
4245 inline indirect calls. */
4248 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4250 ipa_analyze_node (node
);
4251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4253 ipa_print_node_params (dump_file
, node
);
4254 ipa_print_node_jump_functions (dump_file
, node
);
4259 /* Note function body size. */
4262 inline_analyze_function (struct cgraph_node
*node
)
4264 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4267 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4268 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4269 inline_indirect_intraprocedural_analysis (node
);
4270 compute_fn_summary (node
, false);
4273 struct cgraph_edge
*e
;
4274 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4275 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4276 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4277 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4284 /* Called when new function is inserted to callgraph late. */
4287 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4289 inline_analyze_function (node
);
4292 /* Note function body size. */
4295 ipa_fn_summary_generate (void)
4297 struct cgraph_node
*node
;
4299 FOR_EACH_DEFINED_FUNCTION (node
)
4300 if (DECL_STRUCT_FUNCTION (node
->decl
))
4301 node
->versionable
= tree_versionable_function_p (node
->decl
);
4303 ipa_fn_summary_alloc ();
4305 ipa_fn_summaries
->enable_insertion_hook ();
4307 ipa_register_cgraph_hooks ();
4309 FOR_EACH_DEFINED_FUNCTION (node
)
4311 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4312 || opt_for_fn (node
->decl
, optimize
)))
4313 inline_analyze_function (node
);
4317 /* Write inline summary for edge E to OB. */
4320 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4323 class ipa_call_summary
*es
= prevails
4324 ? ipa_call_summaries
->get_create (e
) : NULL
;
4328 int size
= streamer_read_uhwi (ib
);
4329 int time
= streamer_read_uhwi (ib
);
4330 int depth
= streamer_read_uhwi (ib
);
4334 es
->call_stmt_size
= size
;
4335 es
->call_stmt_time
= time
;
4336 es
->loop_depth
= depth
;
4339 bitpack_d bp
= streamer_read_bitpack (ib
);
4341 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4343 bp_unpack_value (&bp
, 1);
4347 edge_set_predicate (e
, &p
);
4348 length
= streamer_read_uhwi (ib
);
4350 && (e
->possibly_call_in_translation_unit_p ()
4351 /* Also stream in jump functions to builtins in hope that they
4352 will get fnspecs. */
4353 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4355 es
->param
.safe_grow_cleared (length
, true);
4356 for (i
= 0; i
< length
; i
++)
4358 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4359 es
->param
[i
].points_to_local_or_readonly_memory
4360 = streamer_read_uhwi (ib
);
4365 for (i
= 0; i
< length
; i
++)
4367 streamer_read_uhwi (ib
);
4368 streamer_read_uhwi (ib
);
4374 /* Stream in inline summaries from the section. */
4377 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4380 const struct lto_function_header
*header
=
4381 (const struct lto_function_header
*) data
;
4382 const int cfg_offset
= sizeof (struct lto_function_header
);
4383 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4384 const int string_offset
= main_offset
+ header
->main_size
;
4385 class data_in
*data_in
;
4386 unsigned int i
, count2
, j
;
4387 unsigned int f_count
;
4389 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4390 file_data
->mode_table
);
4393 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4394 header
->string_size
, vNULL
);
4395 f_count
= streamer_read_uhwi (&ib
);
4396 for (i
= 0; i
< f_count
; i
++)
4399 struct cgraph_node
*node
;
4400 class ipa_fn_summary
*info
;
4401 class ipa_node_params
*params_summary
;
4402 class ipa_size_summary
*size_info
;
4403 lto_symtab_encoder_t encoder
;
4404 struct bitpack_d bp
;
4405 struct cgraph_edge
*e
;
4408 index
= streamer_read_uhwi (&ib
);
4409 encoder
= file_data
->symtab_node_encoder
;
4410 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4412 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4413 params_summary
= node
->prevailing_p ()
4414 ? ipa_node_params_sum
->get (node
) : NULL
;
4415 size_info
= node
->prevailing_p ()
4416 ? ipa_size_summaries
->get_create (node
) : NULL
;
4418 int stack_size
= streamer_read_uhwi (&ib
);
4419 int size
= streamer_read_uhwi (&ib
);
4420 sreal time
= sreal::stream_in (&ib
);
4424 info
->estimated_stack_size
4425 = size_info
->estimated_self_stack_size
= stack_size
;
4426 size_info
->size
= size_info
->self_size
= size
;
4430 bp
= streamer_read_bitpack (&ib
);
4433 info
->inlinable
= bp_unpack_value (&bp
, 1);
4434 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4438 bp_unpack_value (&bp
, 1);
4439 bp_unpack_value (&bp
, 1);
4442 count2
= streamer_read_uhwi (&ib
);
4443 gcc_assert (!info
|| !info
->conds
);
4445 vec_safe_reserve_exact (info
->conds
, count2
);
4446 for (j
= 0; j
< count2
; j
++)
4449 unsigned int k
, count3
;
4450 c
.operand_num
= streamer_read_uhwi (&ib
);
4451 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4452 c
.type
= stream_read_tree (&ib
, data_in
);
4453 c
.val
= stream_read_tree (&ib
, data_in
);
4454 bp
= streamer_read_bitpack (&ib
);
4455 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4456 c
.by_ref
= bp_unpack_value (&bp
, 1);
4458 c
.offset
= streamer_read_uhwi (&ib
);
4459 count3
= streamer_read_uhwi (&ib
);
4462 vec_safe_reserve_exact (c
.param_ops
, count3
);
4464 ipa_set_param_used_by_ipa_predicates
4465 (params_summary
, c
.operand_num
, true);
4466 for (k
= 0; k
< count3
; k
++)
4468 struct expr_eval_op op
;
4469 enum gimple_rhs_class rhs_class
;
4470 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4471 op
.type
= stream_read_tree (&ib
, data_in
);
4472 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4474 case GIMPLE_UNARY_RHS
:
4476 op
.val
[0] = NULL_TREE
;
4477 op
.val
[1] = NULL_TREE
;
4480 case GIMPLE_BINARY_RHS
:
4481 case GIMPLE_TERNARY_RHS
:
4482 bp
= streamer_read_bitpack (&ib
);
4483 op
.index
= bp_unpack_value (&bp
, 2);
4484 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4485 if (rhs_class
== GIMPLE_BINARY_RHS
)
4486 op
.val
[1] = NULL_TREE
;
4488 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4492 fatal_error (UNKNOWN_LOCATION
,
4493 "invalid fnsummary in LTO stream");
4496 c
.param_ops
->quick_push (op
);
4499 info
->conds
->quick_push (c
);
4501 count2
= streamer_read_uhwi (&ib
);
4502 gcc_assert (!info
|| !info
->size_time_table
.length ());
4504 info
->size_time_table
.reserve_exact (count2
);
4505 for (j
= 0; j
< count2
; j
++)
4507 class size_time_entry e
;
4509 e
.size
= streamer_read_uhwi (&ib
);
4510 e
.time
= sreal::stream_in (&ib
);
4511 e
.exec_predicate
.stream_in (&ib
);
4512 e
.nonconst_predicate
.stream_in (&ib
);
4515 info
->size_time_table
.quick_push (e
);
4518 count2
= streamer_read_uhwi (&ib
);
4519 for (j
= 0; j
< count2
; j
++)
4522 sreal fcp_freq
= sreal::stream_in (&ib
);
4525 ipa_freqcounting_predicate fcp
;
4526 fcp
.predicate
= NULL
;
4527 set_hint_predicate (&fcp
.predicate
, p
);
4528 fcp
.freq
= fcp_freq
;
4529 vec_safe_push (info
->loop_iterations
, fcp
);
4532 count2
= streamer_read_uhwi (&ib
);
4533 for (j
= 0; j
< count2
; j
++)
4536 sreal fcp_freq
= sreal::stream_in (&ib
);
4539 ipa_freqcounting_predicate fcp
;
4540 fcp
.predicate
= NULL
;
4541 set_hint_predicate (&fcp
.predicate
, p
);
4542 fcp
.freq
= fcp_freq
;
4543 vec_safe_push (info
->loop_strides
, fcp
);
4546 count2
= streamer_read_uhwi (&ib
);
4548 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4549 for (j
= 0; j
< count2
; j
++)
4551 int parm
= streamer_read_uhwi (&ib
);
4553 info
->builtin_constant_p_parms
.quick_push (parm
);
4555 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4556 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4557 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4558 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4561 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4563 lto_data_in_delete (data_in
);
4567 /* Read inline summary. Jump functions are shared among ipa-cp
4568 and inliner, so when ipa-cp is active, we don't need to write them
4572 ipa_fn_summary_read (void)
4574 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4575 struct lto_file_decl_data
*file_data
;
4578 ipa_prop_read_jump_functions ();
4579 ipa_fn_summary_alloc ();
4581 while ((file_data
= file_data_vec
[j
++]))
4585 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4588 inline_read_section (file_data
, data
, len
);
4590 /* Fatal error here. We do not want to support compiling ltrans units
4591 with different version of compiler or different flags than the WPA
4592 unit, so this should never happen. */
4593 fatal_error (input_location
,
4594 "ipa inline summary is missing in input file");
4596 ipa_register_cgraph_hooks ();
4598 gcc_assert (ipa_fn_summaries
);
4599 ipa_fn_summaries
->enable_insertion_hook ();
4603 /* Write inline summary for edge E to OB. */
4606 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4608 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4611 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4612 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4613 streamer_write_uhwi (ob
, es
->loop_depth
);
4615 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4616 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4617 streamer_write_bitpack (&bp
);
4620 es
->predicate
->stream_out (ob
);
4622 streamer_write_uhwi (ob
, 0);
4623 streamer_write_uhwi (ob
, es
->param
.length ());
4624 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4626 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4627 streamer_write_uhwi (ob
, es
->param
[i
].points_to_local_or_readonly_memory
);
4632 /* Write inline summary for node in SET.
4633 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4634 active, we don't need to write them twice. */
4637 ipa_fn_summary_write (void)
4639 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4640 lto_symtab_encoder_iterator lsei
;
4641 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4642 unsigned int count
= 0;
4644 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4645 lsei_next_function_in_partition (&lsei
))
4647 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4648 if (cnode
->definition
&& !cnode
->alias
)
4651 streamer_write_uhwi (ob
, count
);
4653 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4654 lsei_next_function_in_partition (&lsei
))
4656 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4657 if (cnode
->definition
&& !cnode
->alias
)
4659 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4660 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4661 struct bitpack_d bp
;
4662 struct cgraph_edge
*edge
;
4665 struct condition
*c
;
4667 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4668 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4669 streamer_write_hwi (ob
, size_info
->self_size
);
4670 info
->time
.stream_out (ob
);
4671 bp
= bitpack_create (ob
->main_stream
);
4672 bp_pack_value (&bp
, info
->inlinable
, 1);
4673 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4674 streamer_write_bitpack (&bp
);
4675 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4676 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4679 struct expr_eval_op
*op
;
4681 streamer_write_uhwi (ob
, c
->operand_num
);
4682 streamer_write_uhwi (ob
, c
->code
);
4683 stream_write_tree (ob
, c
->type
, true);
4684 stream_write_tree (ob
, c
->val
, true);
4685 bp
= bitpack_create (ob
->main_stream
);
4686 bp_pack_value (&bp
, c
->agg_contents
, 1);
4687 bp_pack_value (&bp
, c
->by_ref
, 1);
4688 streamer_write_bitpack (&bp
);
4689 if (c
->agg_contents
)
4690 streamer_write_uhwi (ob
, c
->offset
);
4691 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4692 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4694 streamer_write_uhwi (ob
, op
->code
);
4695 stream_write_tree (ob
, op
->type
, true);
4698 bp
= bitpack_create (ob
->main_stream
);
4699 bp_pack_value (&bp
, op
->index
, 2);
4700 streamer_write_bitpack (&bp
);
4701 stream_write_tree (ob
, op
->val
[0], true);
4703 stream_write_tree (ob
, op
->val
[1], true);
4707 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4708 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4710 streamer_write_uhwi (ob
, e
->size
);
4711 e
->time
.stream_out (ob
);
4712 e
->exec_predicate
.stream_out (ob
);
4713 e
->nonconst_predicate
.stream_out (ob
);
4715 ipa_freqcounting_predicate
*fcp
;
4716 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4717 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4719 fcp
->predicate
->stream_out (ob
);
4720 fcp
->freq
.stream_out (ob
);
4722 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4723 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4725 fcp
->predicate
->stream_out (ob
);
4726 fcp
->freq
.stream_out (ob
);
4728 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4730 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4732 streamer_write_uhwi (ob
, ip
);
4733 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4734 write_ipa_call_summary (ob
, edge
);
4735 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4736 write_ipa_call_summary (ob
, edge
);
4739 streamer_write_char_stream (ob
->main_stream
, 0);
4740 produce_asm (ob
, NULL
);
4741 destroy_output_block (ob
);
4743 ipa_prop_write_jump_functions ();
4747 /* Release function summary. */
4750 ipa_free_fn_summary (void)
4752 if (!ipa_call_summaries
)
4754 ggc_delete (ipa_fn_summaries
);
4755 ipa_fn_summaries
= NULL
;
4756 delete ipa_call_summaries
;
4757 ipa_call_summaries
= NULL
;
4758 edge_predicate_pool
.release ();
4759 /* During IPA this is one of largest datastructures to release. */
4764 /* Release function summary. */
4767 ipa_free_size_summary (void)
4769 if (!ipa_size_summaries
)
4771 delete ipa_size_summaries
;
4772 ipa_size_summaries
= NULL
;
4777 const pass_data pass_data_local_fn_summary
=
4779 GIMPLE_PASS
, /* type */
4780 "local-fnsummary", /* name */
4781 OPTGROUP_INLINE
, /* optinfo_flags */
4782 TV_INLINE_PARAMETERS
, /* tv_id */
4783 0, /* properties_required */
4784 0, /* properties_provided */
4785 0, /* properties_destroyed */
4786 0, /* todo_flags_start */
4787 0, /* todo_flags_finish */
4790 class pass_local_fn_summary
: public gimple_opt_pass
4793 pass_local_fn_summary (gcc::context
*ctxt
)
4794 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4797 /* opt_pass methods: */
4798 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
4799 virtual unsigned int execute (function
*)
4801 return compute_fn_summary_for_current ();
4804 }; // class pass_local_fn_summary
4809 make_pass_local_fn_summary (gcc::context
*ctxt
)
4811 return new pass_local_fn_summary (ctxt
);
4815 /* Free inline summary. */
4819 const pass_data pass_data_ipa_free_fn_summary
=
4821 SIMPLE_IPA_PASS
, /* type */
4822 "free-fnsummary", /* name */
4823 OPTGROUP_NONE
, /* optinfo_flags */
4824 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4825 0, /* properties_required */
4826 0, /* properties_provided */
4827 0, /* properties_destroyed */
4828 0, /* todo_flags_start */
4829 0, /* todo_flags_finish */
4832 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4835 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4836 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4840 /* opt_pass methods: */
4841 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
4842 void set_pass_param (unsigned int n
, bool param
)
4844 gcc_assert (n
== 0);
4847 virtual bool gate (function
*) { return true; }
4848 virtual unsigned int execute (function
*)
4850 ipa_free_fn_summary ();
4851 /* Free ipa-prop structures if they are no longer needed. */
4852 ipa_free_all_structures_after_iinln ();
4854 ipa_free_size_summary ();
4860 }; // class pass_ipa_free_fn_summary
4864 simple_ipa_opt_pass
*
4865 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4867 return new pass_ipa_free_fn_summary (ctxt
);
4872 const pass_data pass_data_ipa_fn_summary
=
4874 IPA_PASS
, /* type */
4875 "fnsummary", /* name */
4876 OPTGROUP_INLINE
, /* optinfo_flags */
4877 TV_IPA_FNSUMMARY
, /* tv_id */
4878 0, /* properties_required */
4879 0, /* properties_provided */
4880 0, /* properties_destroyed */
4881 0, /* todo_flags_start */
4882 ( TODO_dump_symtab
), /* todo_flags_finish */
4885 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4888 pass_ipa_fn_summary (gcc::context
*ctxt
)
4889 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4890 ipa_fn_summary_generate
, /* generate_summary */
4891 ipa_fn_summary_write
, /* write_summary */
4892 ipa_fn_summary_read
, /* read_summary */
4893 NULL
, /* write_optimization_summary */
4894 NULL
, /* read_optimization_summary */
4895 NULL
, /* stmt_fixup */
4896 0, /* function_transform_todo_flags_start */
4897 NULL
, /* function_transform */
4898 NULL
) /* variable_transform */
4901 /* opt_pass methods: */
4902 virtual unsigned int execute (function
*) { return 0; }
4904 }; // class pass_ipa_fn_summary
4909 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4911 return new pass_ipa_fn_summary (ctxt
);
4914 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4915 within the same process. For use by toplev::finalize. */
4918 ipa_fnsummary_c_finalize (void)
4920 ipa_free_fn_summary ();