1 /* Function summary pass.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
65 #include "tree-streamer.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-iterator.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
79 #include "ipa-fnsummary.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
85 #include "stringpool.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
93 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
94 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
95 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
97 /* Edge predicates goes here. */
98 static object_allocator
<ipa_predicate
> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
103 ipa_dump_hints (FILE *f
, ipa_hints hints
)
107 fprintf (f
, "IPA hints:");
108 if (hints
& INLINE_HINT_indirect_call
)
110 hints
&= ~INLINE_HINT_indirect_call
;
111 fprintf (f
, " indirect_call");
113 if (hints
& INLINE_HINT_loop_iterations
)
115 hints
&= ~INLINE_HINT_loop_iterations
;
116 fprintf (f
, " loop_iterations");
118 if (hints
& INLINE_HINT_loop_stride
)
120 hints
&= ~INLINE_HINT_loop_stride
;
121 fprintf (f
, " loop_stride");
123 if (hints
& INLINE_HINT_same_scc
)
125 hints
&= ~INLINE_HINT_same_scc
;
126 fprintf (f
, " same_scc");
128 if (hints
& INLINE_HINT_in_scc
)
130 hints
&= ~INLINE_HINT_in_scc
;
131 fprintf (f
, " in_scc");
133 if (hints
& INLINE_HINT_cross_module
)
135 hints
&= ~INLINE_HINT_cross_module
;
136 fprintf (f
, " cross_module");
138 if (hints
& INLINE_HINT_declared_inline
)
140 hints
&= ~INLINE_HINT_declared_inline
;
141 fprintf (f
, " declared_inline");
143 if (hints
& INLINE_HINT_known_hot
)
145 hints
&= ~INLINE_HINT_known_hot
;
146 fprintf (f
, " known_hot");
148 if (hints
& INLINE_HINT_builtin_constant_p
)
150 hints
&= ~INLINE_HINT_builtin_constant_p
;
151 fprintf (f
, " builtin_constant_p");
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
165 ipa_fn_summary::account_size_time (int size
, sreal time
,
166 const ipa_predicate
&exec_pred
,
167 const ipa_predicate
&nonconst_pred_in
,
173 ipa_predicate nonconst_pred
;
174 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
176 if (exec_pred
== false)
179 nonconst_pred
= nonconst_pred_in
& exec_pred
;
181 if (nonconst_pred
== false)
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size
&& time
== 0 && table
->length ())
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time
>= 0 || call
);
192 for (i
= 0; table
->iterate (i
, &e
); i
++)
193 if (e
->exec_predicate
== exec_pred
194 && e
->nonconst_predicate
== nonconst_pred
)
199 if (i
== max_size_time_table_size
)
204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size
) / ipa_fn_summary::size_scale
,
214 (time
.to_double ()), found
? "" : "new ");
215 exec_pred
.dump (dump_file
, conds
, 0);
216 if (exec_pred
!= nonconst_pred
)
218 fprintf (dump_file
, " nonconst:");
219 nonconst_pred
.dump (dump_file
, conds
);
222 fprintf (dump_file
, "\n");
226 class size_time_entry new_entry
;
227 new_entry
.size
= size
;
228 new_entry
.time
= time
;
229 new_entry
.exec_predicate
= exec_pred
;
230 new_entry
.nonconst_predicate
= nonconst_pred
;
232 call_size_time_table
.safe_push (new_entry
);
234 size_time_table
.safe_push (new_entry
);
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge
*
250 redirect_to_unreachable (struct cgraph_edge
*e
)
252 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
253 struct cgraph_node
*target
= cgraph_node::get_create
254 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
257 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
259 e
= cgraph_edge::make_direct (e
, target
);
261 e
->redirect_callee (target
);
262 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
263 e
->inline_failed
= CIF_UNREACHABLE
;
264 e
->count
= profile_count::zero ();
265 es
->call_stmt_size
= 0;
266 es
->call_stmt_time
= 0;
268 callee
->remove_symbol_and_inline_clones ();
272 /* Set predicate for edge E. */
275 edge_set_predicate (struct cgraph_edge
*e
, ipa_predicate
*predicate
)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
280 if (predicate
&& *predicate
== false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e
->speculative
|| e
->callee
))
285 e
= redirect_to_unreachable (e
);
287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
288 if (predicate
&& *predicate
!= true)
291 es
->predicate
= edge_predicate_pool
.allocate ();
292 *es
->predicate
= *predicate
;
297 edge_predicate_pool
.remove (es
->predicate
);
298 es
->predicate
= NULL
;
302 /* Set predicate for hint *P. */
305 set_hint_predicate (ipa_predicate
**p
, ipa_predicate new_predicate
)
307 if (new_predicate
== false || new_predicate
== true)
310 edge_predicate_pool
.remove (*p
);
316 *p
= edge_predicate_pool
.allocate ();
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
327 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
328 const ipa_predicate
&new_predicate
, sreal add_freq
,
329 unsigned max_num_predicates
)
331 if (new_predicate
== false || new_predicate
== true)
333 ipa_freqcounting_predicate
*f
;
334 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
335 if (new_predicate
== f
->predicate
)
340 if (vec_safe_length (*v
) >= max_num_predicates
)
341 /* Too many different predicates to account for. */
344 ipa_freqcounting_predicate fcp
;
345 fcp
.predicate
= NULL
;
346 set_hint_predicate (&fcp
.predicate
, new_predicate
);
348 vec_safe_push (*v
, fcp
);
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
367 ERROR_MARK value of an argument means compile time invariant. */
370 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
372 ipa_auto_call_arg_values
*avals
,
373 clause_t
*ret_clause
,
374 clause_t
*ret_nonspec_clause
)
376 clause_t clause
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
377 clause_t nonspec_clause
= 1 << ipa_predicate::not_inlined_condition
;
378 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
382 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
387 struct expr_eval_op
*op
;
389 /* We allow call stmt to have fewer arguments than the callee function
390 (especially for K&R style programs). So bound check here (we assume
391 m_known_aggs vector is either empty or has the same length as
393 gcc_checking_assert (!avals
->m_known_aggs
.length ()
394 || !avals
->m_known_vals
.length ()
395 || (avals
->m_known_vals
.length ()
396 == avals
->m_known_aggs
.length ()));
400 if (c
->code
== ipa_predicate::changed
402 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
405 if (ipa_agg_value_set
*agg
= avals
->safe_aggval_at (c
->operand_num
))
407 tree sval
= avals
->safe_sval_at (c
->operand_num
);
408 val
= ipa_find_agg_cst_for_param (agg
, sval
, c
->offset
,
416 val
= avals
->safe_sval_at (c
->operand_num
);
417 if (val
&& val
== error_mark_node
418 && c
->code
!= ipa_predicate::changed
)
423 && (c
->code
== ipa_predicate::changed
424 || c
->code
== ipa_predicate::is_not_constant
))
426 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
427 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
430 if (c
->code
== ipa_predicate::changed
)
432 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
436 if (c
->code
== ipa_predicate::is_not_constant
)
438 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
442 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
444 if (c
->type
!= TREE_TYPE (val
))
445 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
446 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
451 val
= fold_unary (op
->code
, op
->type
, val
);
452 else if (!op
->val
[1])
453 val
= fold_binary (op
->code
, op
->type
,
454 op
->index
? op
->val
[0] : val
,
455 op
->index
? val
: op
->val
[0]);
456 else if (op
->index
== 0)
457 val
= fold_ternary (op
->code
, op
->type
,
458 val
, op
->val
[0], op
->val
[1]);
459 else if (op
->index
== 1)
460 val
= fold_ternary (op
->code
, op
->type
,
461 op
->val
[0], val
, op
->val
[1]);
462 else if (op
->index
== 2)
463 val
= fold_ternary (op
->code
, op
->type
,
464 op
->val
[0], op
->val
[1], val
);
470 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
473 if (res
&& integer_zerop (res
))
475 if (res
&& integer_onep (res
))
477 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
479 |= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
483 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
485 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
487 value_range vr
= avals
->m_known_value_ranges
[c
->operand_num
];
488 if (!vr
.undefined_p ()
490 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
492 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
495 range_fold_unary_expr (&res
, NOP_EXPR
,
496 c
->type
, &vr
, vr
.type ());
501 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
503 if (vr
.varying_p () || vr
.undefined_p ())
508 range_fold_unary_expr (&res
, op
->code
, op
->type
, &vr
, type
);
509 else if (!op
->val
[1])
511 value_range
op0 (op
->val
[0], op
->val
[0]);
512 range_fold_binary_expr (&res
, op
->code
, op
->type
,
513 op
->index
? &op0
: &vr
,
514 op
->index
? &vr
: &op0
);
517 res
.set_varying (op
->type
);
521 if (!vr
.varying_p () && !vr
.undefined_p ())
524 value_range
val_vr (c
->val
, c
->val
);
525 range_fold_binary_expr (&res
, c
->code
, boolean_type_node
,
534 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
535 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
537 *ret_clause
= clause
;
538 if (ret_nonspec_clause
)
539 *ret_nonspec_clause
= nonspec_clause
;
542 /* Return true if VRP will be exectued on the function.
543 We do not want to anticipate optimizations that will not happen.
545 FIXME: This can be confused with -fdisable and debug counters and thus
546 it should not be used for correctness (only to make heuristics work).
547 This means that inliner should do its own optimizations of expressions
548 that it predicts to be constant so wrong code can not be triggered by
549 builtin_constant_p. */
552 vrp_will_run_p (struct cgraph_node
*node
)
554 return (opt_for_fn (node
->decl
, optimize
)
555 && !opt_for_fn (node
->decl
, optimize_debug
)
556 && opt_for_fn (node
->decl
, flag_tree_vrp
));
559 /* Similarly about FRE. */
562 fre_will_run_p (struct cgraph_node
*node
)
564 return (opt_for_fn (node
->decl
, optimize
)
565 && !opt_for_fn (node
->decl
, optimize_debug
)
566 && opt_for_fn (node
->decl
, flag_tree_fre
));
569 /* Work out what conditions might be true at invocation of E.
570 Compute costs for inlined edge if INLINE_P is true.
572 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
573 (if non-NULL) conditions evaluated for nonspecialized clone called
576 Vectors in AVALS will be populated with useful known information about
577 argument values - information not known to have any uses will be omitted -
578 except for m_known_contexts which will only be calculated if
579 COMPUTE_CONTEXTS is true. */
582 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
583 clause_t
*clause_ptr
,
584 clause_t
*nonspec_clause_ptr
,
585 ipa_auto_call_arg_values
*avals
,
586 bool compute_contexts
)
588 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
589 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
590 class ipa_edge_args
*args
;
593 *clause_ptr
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
595 if (ipa_node_params_sum
596 && !e
->call_stmt_cannot_inline_p
597 && (info
->conds
|| compute_contexts
)
598 && (args
= ipa_edge_args_sum
->get (e
)) != NULL
)
600 struct cgraph_node
*caller
;
601 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
602 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
603 int i
, count
= ipa_get_cs_argument_count (args
);
607 if (e
->caller
->inlined_to
)
608 caller
= e
->caller
->inlined_to
;
611 caller_parms_info
= ipa_node_params_sum
->get (caller
);
612 callee_pi
= ipa_node_params_sum
->get (callee
);
614 /* Watch for thunks. */
616 /* Watch for variadic functions. */
617 count
= MIN (count
, ipa_get_param_count (callee_pi
));
621 for (i
= 0; i
< count
; i
++)
623 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
625 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
626 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
628 /* Determine if we know constant value of the parameter. */
629 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
630 ipa_get_type (callee_pi
, i
));
632 if (!cst
&& e
->call_stmt
633 && i
< (int)gimple_call_num_args (e
->call_stmt
))
635 cst
= gimple_call_arg (e
->call_stmt
, i
);
636 if (!is_gimple_min_invariant (cst
))
641 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
642 if (!avals
->m_known_vals
.length ())
643 avals
->m_known_vals
.safe_grow_cleared (count
, true);
644 avals
->m_known_vals
[i
] = cst
;
646 else if (inline_p
&& !es
->param
[i
].change_prob
)
648 if (!avals
->m_known_vals
.length ())
649 avals
->m_known_vals
.safe_grow_cleared (count
, true);
650 avals
->m_known_vals
[i
] = error_mark_node
;
653 /* If we failed to get simple constant, try value range. */
654 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
655 && vrp_will_run_p (caller
)
656 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
659 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
660 ipa_get_type (callee_pi
,
662 if (!vr
.undefined_p () && !vr
.varying_p ())
664 if (!avals
->m_known_value_ranges
.length ())
666 avals
->m_known_value_ranges
.safe_grow (count
, true);
667 for (int i
= 0; i
< count
; ++i
)
668 new (&avals
->m_known_value_ranges
[i
])
671 avals
->m_known_value_ranges
[i
] = vr
;
675 /* Determine known aggregate values. */
676 if (fre_will_run_p (caller
))
678 ipa_agg_value_set agg
679 = ipa_agg_value_set_from_jfunc (caller_parms_info
,
681 if (agg
.items
.length ())
683 if (!avals
->m_known_aggs
.length ())
684 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
685 avals
->m_known_aggs
[i
] = agg
;
690 /* For calls used in polymorphic calls we further determine
691 polymorphic call context. */
693 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
695 ipa_polymorphic_call_context
696 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
697 if (!ctx
.useless_p ())
699 if (!avals
->m_known_contexts
.length ())
700 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
701 avals
->m_known_contexts
[i
]
702 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
707 gcc_assert (!count
|| callee
->thunk
);
709 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
711 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
713 for (i
= 0; i
< count
; i
++)
715 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
716 if (!is_gimple_min_invariant (cst
))
720 if (!avals
->m_known_vals
.length ())
721 avals
->m_known_vals
.safe_grow_cleared (count
, true);
722 avals
->m_known_vals
[i
] = cst
;
727 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
732 /* Allocate the function summary. */
735 ipa_fn_summary_alloc (void)
737 gcc_checking_assert (!ipa_fn_summaries
);
738 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
739 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
740 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
743 ipa_call_summary::~ipa_call_summary ()
746 edge_predicate_pool
.remove (predicate
);
751 ipa_fn_summary::~ipa_fn_summary ()
753 unsigned len
= vec_safe_length (loop_iterations
);
754 for (unsigned i
= 0; i
< len
; i
++)
755 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
756 len
= vec_safe_length (loop_strides
);
757 for (unsigned i
= 0; i
< len
; i
++)
758 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
760 call_size_time_table
.release ();
761 vec_free (loop_iterations
);
762 vec_free (loop_strides
);
763 builtin_constant_p_parms
.release ();
767 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
770 for (e
= node
->callees
; e
; e
= e
->next_callee
)
771 ipa_call_summaries
->remove (e
);
772 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
773 ipa_call_summaries
->remove (e
);
776 /* Duplicate predicates in loop hint vector, allocating memory for them and
777 remove and deallocate any uninteresting (true or false) ones. Return the
780 static vec
<ipa_freqcounting_predicate
, va_gc
> *
781 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
782 clause_t possible_truths
)
784 if (vec_safe_length (v
) == 0)
787 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
788 int len
= res
->length();
789 for (int i
= len
- 1; i
>= 0; i
--)
791 ipa_predicate new_predicate
792 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
793 /* We do not want to free previous predicate; it is used by node
795 (*res
)[i
].predicate
= NULL
;
796 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
798 if (!(*res
)[i
].predicate
)
799 res
->unordered_remove (i
);
806 /* Hook that is called by cgraph.cc when a node is duplicated. */
808 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
810 ipa_fn_summary
*src_info
,
811 ipa_fn_summary
*info
)
813 new (info
) ipa_fn_summary (*src_info
);
814 /* TODO: as an optimization, we may avoid copying conditions
815 that are known to be false or true. */
816 info
->conds
= vec_safe_copy (info
->conds
);
818 clone_info
*cinfo
= clone_info::get (dst
);
819 /* When there are any replacements in the function body, see if we can figure
820 out that something was optimized out. */
821 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
823 /* Use SRC parm info since it may not be copied yet. */
824 ipa_node_params
*parms_info
= ipa_node_params_sum
->get (src
);
825 ipa_auto_call_arg_values avals
;
826 int count
= ipa_get_param_count (parms_info
);
828 clause_t possible_truths
;
829 ipa_predicate true_pred
= true;
831 int optimized_out_size
= 0;
832 bool inlined_to_p
= false;
833 struct cgraph_edge
*edge
, *next
;
835 info
->size_time_table
.release ();
836 avals
.m_known_vals
.safe_grow_cleared (count
, true);
837 for (i
= 0; i
< count
; i
++)
839 struct ipa_replace_map
*r
;
841 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
843 if (r
->parm_num
== i
)
845 avals
.m_known_vals
[i
] = r
->new_tree
;
850 evaluate_conditions_for_known_args (dst
, false,
853 /* We are going to specialize,
854 so ignore nonspec truths. */
857 info
->account_size_time (0, 0, true_pred
, true_pred
);
859 /* Remap size_time vectors.
860 Simplify the predicate by pruning out alternatives that are known
862 TODO: as on optimization, we can also eliminate conditions known
864 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
866 ipa_predicate new_exec_pred
;
867 ipa_predicate new_nonconst_pred
;
868 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
870 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
872 if (new_exec_pred
== false || new_nonconst_pred
== false)
873 optimized_out_size
+= e
->size
;
875 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
879 /* Remap edge predicates with the same simplification as above.
880 Also copy constantness arrays. */
881 for (edge
= dst
->callees
; edge
; edge
= next
)
883 ipa_predicate new_predicate
;
884 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
885 next
= edge
->next_callee
;
887 if (!edge
->inline_failed
)
891 new_predicate
= es
->predicate
->remap_after_duplication
893 if (new_predicate
== false && *es
->predicate
!= false)
894 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
895 edge_set_predicate (edge
, &new_predicate
);
898 /* Remap indirect edge predicates with the same simplification as above.
899 Also copy constantness arrays. */
900 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
902 ipa_predicate new_predicate
;
903 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
904 next
= edge
->next_callee
;
906 gcc_checking_assert (edge
->inline_failed
);
909 new_predicate
= es
->predicate
->remap_after_duplication
911 if (new_predicate
== false && *es
->predicate
!= false)
913 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
914 edge_set_predicate (edge
, &new_predicate
);
916 info
->loop_iterations
917 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
920 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
922 if (info
->builtin_constant_p_parms
.length())
924 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
926 info
->builtin_constant_p_parms
= vNULL
;
927 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
928 if (!avals
.m_known_vals
[ip
])
929 info
->builtin_constant_p_parms
.safe_push (ip
);
932 /* If inliner or someone after inliner will ever start producing
933 non-trivial clones, we will get trouble with lack of information
934 about updating self sizes, because size vectors already contains
935 sizes of the callees. */
936 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
940 info
->size_time_table
= src_info
->size_time_table
.copy ();
941 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
942 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
944 info
->builtin_constant_p_parms
945 = info
->builtin_constant_p_parms
.copy ();
947 ipa_freqcounting_predicate
*f
;
948 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
950 ipa_predicate p
= *f
->predicate
;
952 set_hint_predicate (&f
->predicate
, p
);
954 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
956 ipa_predicate p
= *f
->predicate
;
958 set_hint_predicate (&f
->predicate
, p
);
961 if (!dst
->inlined_to
)
962 ipa_update_overall_fn_summary (dst
);
966 /* Hook that is called by cgraph.cc when a node is duplicated. */
969 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
970 struct cgraph_edge
*dst
,
971 class ipa_call_summary
*srcinfo
,
972 class ipa_call_summary
*info
)
974 new (info
) ipa_call_summary (*srcinfo
);
975 info
->predicate
= NULL
;
976 edge_set_predicate (dst
, srcinfo
->predicate
);
977 info
->param
= srcinfo
->param
.copy ();
978 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
980 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
981 - eni_size_weights
.call_cost
);
982 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
983 - eni_time_weights
.call_cost
);
987 /* Dump edge summaries associated to NODE and recursively to all clones.
991 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
992 class ipa_fn_summary
*info
)
994 struct cgraph_edge
*edge
;
995 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
997 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
998 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1002 "%*s%s %s\n%*s freq:%4.2f",
1003 indent
, "", callee
->dump_name (),
1004 !edge
->inline_failed
1005 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1006 indent
, "", edge
->sreal_frequency ().to_double ());
1008 if (cross_module_call_p (edge
))
1009 fprintf (f
, " cross module");
1012 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1013 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1015 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1016 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1018 fprintf (f
, " callee size:%2i stack:%2i",
1019 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1020 (int) s
->estimated_stack_size
);
1022 if (es
&& es
->predicate
)
1024 fprintf (f
, " predicate: ");
1025 es
->predicate
->dump (f
, info
->conds
);
1029 if (es
&& es
->param
.exists ())
1030 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1032 int prob
= es
->param
[i
].change_prob
;
1035 fprintf (f
, "%*s op%i is compile time invariant\n",
1037 else if (prob
!= REG_BR_PROB_BASE
)
1038 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1039 prob
* 100.0 / REG_BR_PROB_BASE
);
1040 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1041 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1044 if (!edge
->inline_failed
)
1046 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1047 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1049 (int) ipa_get_stack_frame_offset (callee
),
1050 (int) ss
->estimated_self_stack_size
);
1051 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1054 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1056 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1057 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1061 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1062 es
->call_stmt_time
);
1065 fprintf (f
, "predicate: ");
1066 es
->predicate
->dump (f
, info
->conds
);
1075 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1077 if (node
->definition
)
1079 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1080 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1085 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1086 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1087 fprintf (f
, " always_inline");
1089 fprintf (f
, " inlinable");
1090 if (s
->fp_expressions
)
1091 fprintf (f
, " fp_expression");
1092 if (s
->builtin_constant_p_parms
.length ())
1094 fprintf (f
, " builtin_constant_p_parms");
1095 for (unsigned int i
= 0;
1096 i
< s
->builtin_constant_p_parms
.length (); i
++)
1097 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1099 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1100 fprintf (f
, " self size: %i\n", ss
->self_size
);
1101 fprintf (f
, " global size: %i\n", ss
->size
);
1102 fprintf (f
, " min size: %i\n", s
->min_size
);
1103 fprintf (f
, " self stack: %i\n",
1104 (int) ss
->estimated_self_stack_size
);
1105 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1107 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1109 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1110 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1112 fprintf (f
, " size:%f, time:%f",
1113 (double) e
->size
/ ipa_fn_summary::size_scale
,
1114 e
->time
.to_double ());
1115 if (e
->exec_predicate
!= true)
1117 fprintf (f
, ", executed if:");
1118 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1120 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1122 fprintf (f
, ", nonconst if:");
1123 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1127 ipa_freqcounting_predicate
*fcp
;
1128 bool first_fcp
= true;
1129 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1133 fprintf (f
, " loop iterations:");
1136 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1137 fcp
->predicate
->dump (f
, s
->conds
);
1140 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1144 fprintf (f
, " loop strides:");
1147 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1148 fcp
->predicate
->dump (f
, s
->conds
);
1150 fprintf (f
, " calls:\n");
1151 dump_ipa_call_summary (f
, 4, node
, s
);
1154 fprintf (f
, " target_info: %x\n", s
->target_info
);
1157 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1162 ipa_debug_fn_summary (struct cgraph_node
*node
)
1164 ipa_dump_fn_summary (stderr
, node
);
1168 ipa_dump_fn_summaries (FILE *f
)
1170 struct cgraph_node
*node
;
1172 FOR_EACH_DEFINED_FUNCTION (node
)
1173 if (!node
->inlined_to
)
1174 ipa_dump_fn_summary (f
, node
);
1177 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1178 boolean variable pointed to by DATA. */
1181 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1184 bool *b
= (bool *) data
;
1189 /* If OP refers to value of function parameter, return the corresponding
1190 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1191 PARM_DECL) will be stored to *SIZE_P in that case too. */
1194 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1197 /* SSA_NAME referring to parm default def? */
1198 if (TREE_CODE (op
) == SSA_NAME
1199 && SSA_NAME_IS_DEFAULT_DEF (op
)
1200 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1203 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1204 return SSA_NAME_VAR (op
);
1206 /* Non-SSA parm reference? */
1207 if (TREE_CODE (op
) == PARM_DECL
1208 && fbi
->aa_walk_budget
> 0)
1210 bool modified
= false;
1213 ao_ref_init (&refd
, op
);
1214 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1215 mark_modified
, &modified
, NULL
, NULL
,
1216 fbi
->aa_walk_budget
);
1219 fbi
->aa_walk_budget
= 0;
1222 fbi
->aa_walk_budget
-= walked
;
1226 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1233 /* If OP refers to value of function parameter, return the corresponding
1234 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1235 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1236 stored to *SIZE_P in that case too. */
1239 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1242 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1246 if (TREE_CODE (op
) == SSA_NAME
1247 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1248 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1249 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1250 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1255 /* If OP refers to a value of a function parameter or value loaded from an
1256 aggregate passed to a parameter (either by value or reference), return TRUE
1257 and store the number of the parameter to *INDEX_P, the access size into
1258 *SIZE_P, and information whether and how it has been loaded from an
1259 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1260 statement in which OP is used or loaded. */
1263 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1264 gimple
*stmt
, tree op
, int *index_p
,
1266 struct agg_position_info
*aggpos
)
1268 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1270 gcc_checking_assert (aggpos
);
1273 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1276 aggpos
->agg_contents
= false;
1277 aggpos
->by_ref
= false;
1281 if (TREE_CODE (op
) == SSA_NAME
)
1283 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1284 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1286 stmt
= SSA_NAME_DEF_STMT (op
);
1287 op
= gimple_assign_rhs1 (stmt
);
1288 if (!REFERENCE_CLASS_P (op
))
1289 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1293 aggpos
->agg_contents
= true;
1294 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1295 stmt
, op
, index_p
, &aggpos
->offset
,
1296 size_p
, &aggpos
->by_ref
);
1299 /* See if statement might disappear after inlining.
1300 0 - means not eliminated
1301 1 - half of statements goes away
1302 2 - for sure it is eliminated.
1303 We are not terribly sophisticated, basically looking for simple abstraction
1304 penalty wrappers. */
1307 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1309 enum gimple_code code
= gimple_code (stmt
);
1310 enum tree_code rhs_code
;
1320 if (gimple_num_ops (stmt
) != 2)
1323 rhs_code
= gimple_assign_rhs_code (stmt
);
1325 /* Casts of parameters, loads from parameters passed by reference
1326 and stores to return value or parameters are often free after
1327 inlining due to SRA and further combining.
1328 Assume that half of statements goes away. */
1329 if (CONVERT_EXPR_CODE_P (rhs_code
)
1330 || rhs_code
== VIEW_CONVERT_EXPR
1331 || rhs_code
== ADDR_EXPR
1332 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1334 tree rhs
= gimple_assign_rhs1 (stmt
);
1335 tree lhs
= gimple_assign_lhs (stmt
);
1336 tree inner_rhs
= get_base_address (rhs
);
1337 tree inner_lhs
= get_base_address (lhs
);
1338 bool rhs_free
= false;
1339 bool lhs_free
= false;
1346 /* Reads of parameter are expected to be free. */
1347 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1349 /* Match expressions of form &this->field. Those will most likely
1350 combine with something upstream after inlining. */
1351 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1353 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1354 if (TREE_CODE (op
) == PARM_DECL
)
1356 else if (TREE_CODE (op
) == MEM_REF
1357 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1362 /* When parameter is not SSA register because its address is taken
1363 and it is just copied into one, the statement will be completely
1364 free after inlining (we will copy propagate backward). */
1365 if (rhs_free
&& is_gimple_reg (lhs
))
1368 /* Reads of parameters passed by reference
1369 expected to be free (i.e. optimized out after inlining). */
1370 if (TREE_CODE (inner_rhs
) == MEM_REF
1371 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1374 /* Copying parameter passed by reference into gimple register is
1375 probably also going to copy propagate, but we can't be quite
1377 if (rhs_free
&& is_gimple_reg (lhs
))
1380 /* Writes to parameters, parameters passed by value and return value
1381 (either directly or passed via invisible reference) are free.
1383 TODO: We ought to handle testcase like
1384 struct a {int a,b;};
1392 This translate into:
1407 For that we either need to copy ipa-split logic detecting writes
1409 if (TREE_CODE (inner_lhs
) == PARM_DECL
1410 || TREE_CODE (inner_lhs
) == RESULT_DECL
1411 || (TREE_CODE (inner_lhs
) == MEM_REF
1412 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1414 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1415 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1416 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1418 0))) == RESULT_DECL
))))
1421 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1423 if (lhs_free
&& rhs_free
)
1432 /* Analyze EXPR if it represents a series of simple operations performed on
1433 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1434 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1435 Type of the parameter or load from an aggregate via the parameter is
1436 stored in *TYPE_P. Operations on the parameter are recorded to
1437 PARAM_OPS_P if it is not NULL. */
1440 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1441 gimple
*stmt
, tree expr
,
1442 int *index_p
, tree
*type_p
,
1443 struct agg_position_info
*aggpos
,
1444 expr_eval_ops
*param_ops_p
= NULL
)
1446 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1450 *param_ops_p
= NULL
;
1454 expr_eval_op eval_op
;
1456 unsigned cst_count
= 0;
1458 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1461 tree type
= TREE_TYPE (expr
);
1463 if (aggpos
->agg_contents
)
1465 /* Stop if containing bit-field. */
1466 if (TREE_CODE (expr
) == BIT_FIELD_REF
1467 || contains_bitfld_component_ref_p (expr
))
1475 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1478 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1481 switch (gimple_assign_rhs_class (stmt
))
1483 case GIMPLE_SINGLE_RHS
:
1484 expr
= gimple_assign_rhs1 (stmt
);
1487 case GIMPLE_UNARY_RHS
:
1491 case GIMPLE_BINARY_RHS
:
1495 case GIMPLE_TERNARY_RHS
:
1503 /* Stop if expression is too complex. */
1504 if (op_count
++ == op_limit
)
1509 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1510 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1511 eval_op
.val
[0] = NULL_TREE
;
1512 eval_op
.val
[1] = NULL_TREE
;
1516 for (unsigned i
= 0; i
< rhs_count
; i
++)
1518 tree op
= gimple_op (stmt
, i
+ 1);
1520 gcc_assert (op
&& !TYPE_P (op
));
1521 if (is_gimple_ip_invariant (op
))
1523 if (++cst_count
== rhs_count
)
1526 eval_op
.val
[cst_count
- 1] = op
;
1530 /* Found a non-constant operand, and record its index in rhs
1537 /* Found more than one non-constant operands. */
1543 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1546 /* Failed to decompose, free resource and return. */
1549 vec_free (*param_ops_p
);
1554 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1557 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1561 /* Avoid duplicates. */
1562 for (unsigned int i
= 0;
1563 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1566 summary
->builtin_constant_p_parms
.safe_push (parm
);
1569 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1570 predicates to the CFG edges. */
1573 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1574 class ipa_fn_summary
*summary
,
1575 class ipa_node_params
*params_summary
,
1581 struct agg_position_info aggpos
;
1582 enum tree_code code
, inverted_code
;
1587 expr_eval_ops param_ops
;
1589 last
= last_stmt (bb
);
1590 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1592 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1594 op
= gimple_cond_lhs (last
);
1596 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1599 code
= gimple_cond_code (last
);
1600 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1602 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1604 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1605 ? code
: inverted_code
);
1606 /* invert_tree_comparison will return ERROR_MARK on FP
1607 comparisons that are not EQ/NE instead of returning proper
1608 unordered one. Be sure it is not confused with NON_CONSTANT.
1610 And if the edge's target is the final block of diamond CFG graph
1611 of this conditional statement, we do not need to compute
1612 predicate for the edge because the final block's predicate must
1613 be at least as that of the first block of the statement. */
1614 if (this_code
!= ERROR_MARK
1615 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1618 = add_condition (summary
, params_summary
, index
,
1619 param_type
, &aggpos
,
1620 this_code
, gimple_cond_rhs (last
), param_ops
);
1621 e
->aux
= edge_predicate_pool
.allocate ();
1622 *(ipa_predicate
*) e
->aux
= p
;
1625 vec_free (param_ops
);
1628 if (TREE_CODE (op
) != SSA_NAME
)
1631 if (builtin_constant_p (op))
1635 Here we can predicate nonconstant_code. We can't
1636 really handle constant_code since we have no predicate
1637 for this and also the constant code is not known to be
1638 optimized away when inliner doesn't see operand is constant.
1639 Other optimizers might think otherwise. */
1640 if (gimple_cond_code (last
) != NE_EXPR
1641 || !integer_zerop (gimple_cond_rhs (last
)))
1643 set_stmt
= SSA_NAME_DEF_STMT (op
);
1644 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1645 || gimple_call_num_args (set_stmt
) != 1)
1647 op2
= gimple_call_arg (set_stmt
, 0);
1648 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1651 add_builtin_constant_p_parm (summary
, index
);
1652 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1654 ipa_predicate p
= add_condition (summary
, params_summary
, index
,
1655 param_type
, &aggpos
,
1656 ipa_predicate::is_not_constant
, NULL_TREE
);
1657 e
->aux
= edge_predicate_pool
.allocate ();
1658 *(ipa_predicate
*) e
->aux
= p
;
1663 /* If BB ends by a switch we can turn into predicates, attach corresponding
1664 predicates to the CFG edges. */
1667 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1668 class ipa_fn_summary
*summary
,
1669 class ipa_node_params
*params_summary
,
1675 struct agg_position_info aggpos
;
1681 expr_eval_ops param_ops
;
1683 lastg
= last_stmt (bb
);
1684 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1686 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1687 op
= gimple_switch_index (last
);
1688 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1692 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1693 tree type
= TREE_TYPE (op
);
1694 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1695 param_ipa_max_switch_predicate_bounds
);
1696 int bound_count
= 0;
1699 get_range_query (cfun
)->range_of_expr (vr
, op
);
1700 if (vr
.undefined_p ())
1701 vr
.set_varying (TREE_TYPE (op
));
1702 value_range_kind vr_type
= vr
.kind ();
1703 wide_int vr_wmin
= wi::to_wide (vr
.min ());
1704 wide_int vr_wmax
= wi::to_wide (vr
.max ());
1706 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1708 e
->aux
= edge_predicate_pool
.allocate ();
1709 *(ipa_predicate
*) e
->aux
= false;
1712 e
= gimple_switch_edge (cfun
, last
, 0);
1713 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1714 default case if its target basic block is in convergence point of all
1715 switch cases, which can be determined by checking whether it
1716 post-dominates the switch statement. */
1717 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1718 bound_count
= INT_MAX
;
1720 n
= gimple_switch_num_labels (last
);
1721 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1723 tree cl
= gimple_switch_label (last
, case_idx
);
1724 tree min
= CASE_LOW (cl
);
1725 tree max
= CASE_HIGH (cl
);
1728 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1730 /* The case value might not have same type as switch expression,
1731 extend the value based on the expression type. */
1732 if (TREE_TYPE (min
) != type
)
1733 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1737 else if (TREE_TYPE (max
) != type
)
1738 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1740 /* The case's target basic block is in convergence point of all switch
1741 cases, its predicate should be at least as that of the switch
1743 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1745 else if (min
== max
)
1746 p
= add_condition (summary
, params_summary
, index
, param_type
,
1747 &aggpos
, EQ_EXPR
, min
, param_ops
);
1750 ipa_predicate p1
, p2
;
1751 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1752 &aggpos
, GE_EXPR
, min
, param_ops
);
1753 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1754 &aggpos
, LE_EXPR
, max
, param_ops
);
1757 *(ipa_predicate
*) e
->aux
1758 = p
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1760 /* If there are too many disjoint case ranges, predicate for default
1761 case might become too complicated. So add a limit here. */
1762 if (bound_count
> bound_limit
)
1765 bool new_range
= true;
1767 if (!ranges
.is_empty ())
1769 wide_int curr_wmin
= wi::to_wide (min
);
1770 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1772 /* Merge case ranges if they are continuous. */
1773 if (curr_wmin
== last_wmax
+ 1)
1775 else if (vr_type
== VR_ANTI_RANGE
)
1777 /* If two disjoint case ranges can be connected by anti-range
1778 of switch index, combine them to one range. */
1779 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1780 vr_type
= VR_UNDEFINED
;
1781 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1786 /* Create/extend a case range. And we count endpoints of range set,
1787 this number nearly equals to number of conditions that we will create
1788 for predicate of default case. */
1791 bound_count
+= (min
== max
) ? 1 : 2;
1792 ranges
.safe_push (std::make_pair (min
, max
));
1796 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1797 ranges
.last ().second
= max
;
1801 e
= gimple_switch_edge (cfun
, last
, 0);
1802 if (bound_count
> bound_limit
)
1804 *(ipa_predicate
*) e
->aux
= true;
1805 vec_free (param_ops
);
1809 ipa_predicate p_seg
= true;
1810 ipa_predicate p_all
= false;
1812 if (vr_type
!= VR_RANGE
)
1814 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1815 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1818 /* Construct predicate to represent default range set that is negation of
1819 all case ranges. Case range is classified as containing single/non-single
1820 values. Suppose a piece of case ranges in the following.
1822 [D1...D2] [S1] ... [Sn] [D3...D4]
1824 To represent default case's range sets between two non-single value
1825 case ranges (From D2 to D3), we construct predicate as:
1827 D2 < x < D3 && x != S1 && ... && x != Sn
1829 for (size_t i
= 0; i
< ranges
.length (); i
++)
1831 tree min
= ranges
[i
].first
;
1832 tree max
= ranges
[i
].second
;
1835 p_seg
&= add_condition (summary
, params_summary
, index
,
1836 param_type
, &aggpos
, NE_EXPR
,
1840 /* Do not create sub-predicate for range that is beyond low bound
1842 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1844 p_seg
&= add_condition (summary
, params_summary
, index
,
1845 param_type
, &aggpos
,
1846 LT_EXPR
, min
, param_ops
);
1847 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1850 /* Do not create sub-predicate for range that is beyond up bound
1852 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1858 p_seg
= add_condition (summary
, params_summary
, index
,
1859 param_type
, &aggpos
, GT_EXPR
,
1864 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1865 *(ipa_predicate
*) e
->aux
1866 = p_all
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1868 vec_free (param_ops
);
1872 /* For each BB in NODE attach to its AUX pointer predicate under
1873 which it is executable. */
1876 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1877 struct cgraph_node
*node
,
1878 class ipa_fn_summary
*summary
,
1879 class ipa_node_params
*params_summary
)
1881 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1885 FOR_EACH_BB_FN (bb
, my_function
)
1887 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1888 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1891 /* Entry block is always executable. */
1892 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1893 = edge_predicate_pool
.allocate ();
1894 *(ipa_predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1896 /* A simple dataflow propagation of predicates forward in the CFG.
1897 TODO: work in reverse postorder. */
1901 FOR_EACH_BB_FN (bb
, my_function
)
1903 ipa_predicate p
= false;
1906 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1910 ipa_predicate this_bb_predicate
1911 = *(ipa_predicate
*) e
->src
->aux
;
1913 this_bb_predicate
&= (*(ipa_predicate
*) e
->aux
);
1914 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1921 basic_block pdom_bb
;
1926 bb
->aux
= edge_predicate_pool
.allocate ();
1927 *((ipa_predicate
*) bb
->aux
) = p
;
1929 else if (p
!= *(ipa_predicate
*) bb
->aux
)
1931 /* This OR operation is needed to ensure monotonous data flow
1932 in the case we hit the limit on number of clauses and the
1933 and/or operations above give approximate answers. */
1934 p
= p
.or_with (summary
->conds
, *(ipa_predicate
*)bb
->aux
);
1935 if (p
!= *(ipa_predicate
*)bb
->aux
)
1938 *((ipa_predicate
*)bb
->aux
) = p
;
1942 /* For switch/if statement, we can OR-combine predicates of all
1943 its cases/branches to get predicate for basic block in their
1944 convergence point, but sometimes this will generate very
1945 complicated predicate. Actually, we can get simplified
1946 predicate in another way by using the fact that predicate
1947 for a basic block must also hold true for its post dominators.
1948 To be specific, basic block in convergence point of
1949 conditional statement should include predicate of the
1951 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1952 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1954 else if (!pdom_bb
->aux
)
1957 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1958 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1960 else if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1962 p
= p
.or_with (summary
->conds
,
1963 *(ipa_predicate
*)pdom_bb
->aux
);
1964 if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
1967 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
1976 /* Return predicate specifying when the STMT might have result that is not
1977 a compile time constant. */
1979 static ipa_predicate
1980 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1981 class ipa_fn_summary
*summary
,
1982 class ipa_node_params
*params_summary
,
1984 vec
<ipa_predicate
> nonconstant_names
)
1989 while (UNARY_CLASS_P (expr
))
1990 expr
= TREE_OPERAND (expr
, 0);
1992 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1993 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1994 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1995 ipa_predicate::changed
, NULL_TREE
);
1996 if (is_gimple_min_invariant (expr
))
1998 if (TREE_CODE (expr
) == SSA_NAME
)
1999 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
2000 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
2003 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2005 TREE_OPERAND (expr
, 0),
2011 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2013 TREE_OPERAND (expr
, 1),
2015 return p1
.or_with (summary
->conds
, p2
);
2017 else if (TREE_CODE (expr
) == COND_EXPR
)
2020 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2022 TREE_OPERAND (expr
, 0),
2028 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2030 TREE_OPERAND (expr
, 1),
2034 p1
= p1
.or_with (summary
->conds
, p2
);
2035 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2037 TREE_OPERAND (expr
, 2),
2039 return p2
.or_with (summary
->conds
, p1
);
2041 else if (TREE_CODE (expr
) == CALL_EXPR
)
2051 /* Return predicate specifying when the STMT might have result that is not
2052 a compile time constant. */
2054 static ipa_predicate
2055 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2056 class ipa_fn_summary
*summary
,
2057 class ipa_node_params
*params_summary
,
2059 vec
<ipa_predicate
> nonconstant_names
)
2061 ipa_predicate p
= true;
2064 tree param_type
= NULL_TREE
;
2065 ipa_predicate op_non_const
;
2068 struct agg_position_info aggpos
;
2070 /* What statements might be optimized away
2071 when their arguments are constant. */
2072 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2073 && gimple_code (stmt
) != GIMPLE_COND
2074 && gimple_code (stmt
) != GIMPLE_SWITCH
2075 && (gimple_code (stmt
) != GIMPLE_CALL
2076 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2079 /* Stores will stay anyway. */
2080 if (gimple_store_p (stmt
))
2083 is_load
= gimple_assign_load_p (stmt
);
2085 /* Loads can be optimized when the value is known. */
2088 tree op
= gimple_assign_rhs1 (stmt
);
2089 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2096 /* See if we understand all operands before we start
2097 adding conditionals. */
2098 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2100 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2101 /* For arguments we can build a condition. */
2102 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2104 if (TREE_CODE (use
) != SSA_NAME
)
2106 /* If we know when operand is constant,
2107 we still can say something useful. */
2108 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2115 add_condition (summary
, params_summary
,
2116 base_index
, param_type
, &aggpos
,
2117 ipa_predicate::changed
, NULL_TREE
);
2119 op_non_const
= false;
2120 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2122 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2125 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2127 if (index
!= base_index
)
2128 p
= add_condition (summary
, params_summary
, index
,
2129 TREE_TYPE (parm
), NULL
,
2130 ipa_predicate::changed
, NULL_TREE
);
2135 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2136 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2138 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2139 && gimple_op (stmt
, 0)
2140 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2141 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2143 return op_non_const
;
2146 struct record_modified_bb_info
2153 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2154 probability how often it changes between USE_BB.
2155 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2156 is in different loop nest, we can do better.
2157 This is all just estimate. In theory we look for minimal cut separating
2158 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2162 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2164 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2165 if (l
&& l
->header
->count
< init_bb
->count
)
2170 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2171 set except for info->stmt. */
2174 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2176 struct record_modified_bb_info
*info
=
2177 (struct record_modified_bb_info
*) data
;
2178 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2180 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2182 bitmap_set_bit (info
->bb_set
,
2183 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2184 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2186 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2187 gimple_bb (info
->stmt
))->index
);
2190 fprintf (dump_file
, " Param ");
2191 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2192 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2193 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2195 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2196 gimple_bb (info
->stmt
))->index
);
2197 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2202 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2203 will change since last invocation of STMT.
2205 Value 0 is reserved for compile time invariants.
2206 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2207 ought to be REG_BR_PROB_BASE / estimated_iters. */
2210 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2212 tree op
= gimple_call_arg (stmt
, i
);
2213 basic_block bb
= gimple_bb (stmt
);
2215 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2216 op
= TREE_OPERAND (op
, 0);
2218 tree base
= get_base_address (op
);
2220 /* Global invariants never change. */
2221 if (is_gimple_min_invariant (base
))
2224 /* We would have to do non-trivial analysis to really work out what
2225 is the probability of value to change (i.e. when init statement
2226 is in a sibling loop of the call).
2228 We do an conservative estimate: when call is executed N times more often
2229 than the statement defining value, we take the frequency 1/N. */
2230 if (TREE_CODE (base
) == SSA_NAME
)
2232 profile_count init_count
;
2234 if (!bb
->count
.nonzero_p ())
2235 return REG_BR_PROB_BASE
;
2237 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2238 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2240 init_count
= get_minimal_bb
2241 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2242 gimple_bb (stmt
))->count
;
2244 if (init_count
< bb
->count
)
2245 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2246 * REG_BR_PROB_BASE
).to_int (), 1);
2247 return REG_BR_PROB_BASE
;
2252 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2253 struct record_modified_bb_info info
;
2254 tree init
= ctor_for_folding (base
);
2256 if (init
!= error_mark_node
)
2258 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2259 return REG_BR_PROB_BASE
;
2262 fprintf (dump_file
, " Analyzing param change probability of ");
2263 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2264 fprintf (dump_file
, "\n");
2266 ao_ref_init (&refd
, op
);
2269 info
.bb_set
= BITMAP_ALLOC (NULL
);
2271 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2272 NULL
, NULL
, fbi
->aa_walk_budget
);
2274 fbi
->aa_walk_budget
-= walked
;
2275 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2278 fbi
->aa_walk_budget
= 0;
2282 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2284 fprintf (dump_file
, " Set in same BB as used.\n");
2286 BITMAP_FREE (info
.bb_set
);
2287 return REG_BR_PROB_BASE
;
2292 /* Lookup the most frequent update of the value and believe that
2293 it dominates all the other; precise analysis here is difficult. */
2294 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2295 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2298 fprintf (dump_file
, " Set with count ");
2299 max
.dump (dump_file
);
2300 fprintf (dump_file
, " and used with count ");
2301 bb
->count
.dump (dump_file
);
2302 fprintf (dump_file
, " freq %f\n",
2303 max
.to_sreal_scale (bb
->count
).to_double ());
2306 BITMAP_FREE (info
.bb_set
);
2307 if (max
< bb
->count
)
2308 return MAX ((max
.to_sreal_scale (bb
->count
)
2309 * REG_BR_PROB_BASE
).to_int (), 1);
2310 return REG_BR_PROB_BASE
;
2314 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2315 sub-graph and if the predicate the condition depends on is known. If so,
2316 return true and store the pointer the predicate in *P. */
2319 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2320 ipa_fn_summary
*summary
,
2321 class ipa_node_params
*params_summary
,
2324 vec
<ipa_predicate
> nonconstant_names
)
2328 basic_block first_bb
= NULL
;
2331 if (single_pred_p (bb
))
2337 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2339 if (single_succ_p (e
->src
))
2341 if (!single_pred_p (e
->src
))
2344 first_bb
= single_pred (e
->src
);
2345 else if (single_pred (e
->src
) != first_bb
)
2352 else if (e
->src
!= first_bb
)
2360 stmt
= last_stmt (first_bb
);
2362 || gimple_code (stmt
) != GIMPLE_COND
2363 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2366 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2367 gimple_cond_lhs (stmt
),
2375 /* Given a PHI statement in a function described by inline properties SUMMARY
2376 and *P being the predicate describing whether the selected PHI argument is
2377 known, store a predicate for the result of the PHI statement into
2378 NONCONSTANT_NAMES, if possible. */
2381 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2383 vec
<ipa_predicate
> nonconstant_names
)
2387 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2389 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2390 if (!is_gimple_min_invariant (arg
))
2392 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2393 *p
= p
->or_with (summary
->conds
,
2394 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2402 fprintf (dump_file
, "\t\tphi predicate: ");
2403 p
->dump (dump_file
, summary
->conds
);
2405 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2408 /* For a typical usage of __builtin_expect (a<b, 1), we
2409 may introduce an extra relation stmt:
2410 With the builtin, we have
2413 t3 = __builtin_expect (t2, 1);
2416 Without the builtin, we have
2419 This affects the size/time estimation and may have
2420 an impact on the earlier inlining.
2421 Here find this pattern and fix it up later. */
2424 find_foldable_builtin_expect (basic_block bb
)
2426 gimple_stmt_iterator bsi
;
2428 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2430 gimple
*stmt
= gsi_stmt (bsi
);
2431 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2432 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2433 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2435 tree var
= gimple_call_lhs (stmt
);
2436 tree arg
= gimple_call_arg (stmt
, 0);
2437 use_operand_p use_p
;
2444 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2446 while (TREE_CODE (arg
) == SSA_NAME
)
2448 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2449 if (!is_gimple_assign (stmt_tmp
))
2451 switch (gimple_assign_rhs_code (stmt_tmp
))
2470 arg
= gimple_assign_rhs1 (stmt_tmp
);
2473 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2474 && gimple_code (use_stmt
) == GIMPLE_COND
)
2481 /* Return true when the basic blocks contains only clobbers followed by RESX.
2482 Such BBs are kept around to make removal of dead stores possible with
2483 presence of EH and will be optimized out by optimize_clobbers later in the
2486 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2487 that can be clobber only, too.. When it is false, the RESX is not necessary
2488 on the end of basic block. */
2491 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2493 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2499 if (gsi_end_p (gsi
))
2501 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2505 else if (!single_succ_p (bb
))
2508 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2510 gimple
*stmt
= gsi_stmt (gsi
);
2511 if (is_gimple_debug (stmt
))
2513 if (gimple_clobber_p (stmt
))
2515 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2520 /* See if all predecessors are either throws or clobber only BBs. */
2521 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2522 if (!(e
->flags
& EDGE_EH
)
2523 && !clobber_only_eh_bb_p (e
->src
, false))
2529 /* Return true if STMT compute a floating point expression that may be affected
2530 by -ffast-math and similar flags. */
2533 fp_expression_p (gimple
*stmt
)
2538 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2539 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2544 /* Return true if T references memory location that is local
2545 for the function (that means, dead after return) or read-only. */
2548 refs_local_or_readonly_memory_p (tree t
)
2550 /* Non-escaping memory is fine. */
2551 t
= get_base_address (t
);
2552 if ((TREE_CODE (t
) == MEM_REF
2553 || TREE_CODE (t
) == TARGET_MEM_REF
))
2554 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2556 /* Automatic variables are fine. */
2558 && auto_var_in_fn_p (t
, current_function_decl
))
2561 /* Read-only variables are fine. */
2562 if (DECL_P (t
) && TREE_READONLY (t
))
2568 /* Return true if T is a pointer pointing to memory location that is local
2569 for the function (that means, dead after return) or read-only. */
2572 points_to_local_or_readonly_memory_p (tree t
)
2574 /* See if memory location is clearly invalid. */
2575 if (integer_zerop (t
))
2576 return flag_delete_null_pointer_checks
;
2577 if (TREE_CODE (t
) == SSA_NAME
)
2579 /* For IPA passes we can consinder accesses to return slot local
2580 even if it is not local in the sense that memory is dead by
2581 the end of founction.
2582 The outer function will see a store in the call assignment
2583 and thus this will do right thing for all uses of this
2584 function in the current IPA passes (modref, pure/const discovery
2585 and inlining heuristics). */
2586 if (DECL_RESULT (current_function_decl
)
2587 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl
))
2588 && t
== ssa_default_def (cfun
, DECL_RESULT (current_function_decl
)))
2590 return !ptr_deref_may_alias_global_p (t
);
2592 if (TREE_CODE (t
) == ADDR_EXPR
)
2593 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2598 /* Analyze function body for NODE.
2599 EARLY indicates run from early optimization pipeline. */
2602 analyze_function_body (struct cgraph_node
*node
, bool early
)
2604 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2605 /* Estimate static overhead for function prologue/epilogue and alignment. */
2606 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2607 /* Benefits are scaled by probability of elimination that is in range
2610 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2612 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2613 ipa_node_params
*params_summary
2614 = early
? NULL
: ipa_node_params_sum
->get (node
);
2615 ipa_predicate bb_predicate
;
2616 struct ipa_func_body_info fbi
;
2617 vec
<ipa_predicate
> nonconstant_names
= vNULL
;
2620 gimple
*fix_builtin_expect_stmt
;
2622 gcc_assert (my_function
&& my_function
->cfg
);
2623 gcc_assert (cfun
== my_function
);
2625 memset(&fbi
, 0, sizeof(fbi
));
2626 vec_free (info
->conds
);
2628 info
->size_time_table
.release ();
2629 info
->call_size_time_table
.release ();
2631 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2632 so we can produce proper inline hints.
2634 When optimizing and analyzing for early inliner, initialize node params
2635 so we can produce correct BB predicates. */
2637 if (opt_for_fn (node
->decl
, optimize
))
2639 calculate_dominance_info (CDI_DOMINATORS
);
2640 calculate_dominance_info (CDI_POST_DOMINATORS
);
2642 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2645 ipa_check_create_node_params ();
2646 ipa_initialize_node_params (node
);
2649 if (ipa_node_params_sum
)
2652 fbi
.info
= ipa_node_params_sum
->get (node
);
2653 fbi
.bb_infos
= vNULL
;
2654 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2655 fbi
.param_count
= count_formal_params (node
->decl
);
2656 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2658 nonconstant_names
.safe_grow_cleared
2659 (SSANAMES (my_function
)->length (), true);
2664 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2665 node
->dump_name ());
2667 /* When we run into maximal number of entries, we assign everything to the
2668 constant truth case. Be sure to have it in list. */
2669 bb_predicate
= true;
2670 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2672 bb_predicate
= ipa_predicate::not_inlined ();
2673 info
->account_size_time (opt_for_fn (node
->decl
,
2674 param_uninlined_function_insns
)
2675 * ipa_fn_summary::size_scale
,
2676 opt_for_fn (node
->decl
,
2677 param_uninlined_function_time
),
2681 /* Only look for target information for inlinable functions. */
2682 bool scan_for_target_info
=
2684 && targetm
.target_option
.need_ipa_fn_target_info (node
->decl
,
2688 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2689 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2690 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2691 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2692 for (n
= 0; n
< nblocks
; n
++)
2694 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2695 freq
= bb
->count
.to_sreal_scale (entry_count
);
2696 if (clobber_only_eh_bb_p (bb
))
2698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2699 fprintf (dump_file
, "\n Ignoring BB %i;"
2700 " it will be optimized away by cleanup_clobbers\n",
2705 /* TODO: Obviously predicates can be propagated down across CFG. */
2709 bb_predicate
= *(ipa_predicate
*)bb
->aux
;
2711 bb_predicate
= false;
2714 bb_predicate
= true;
2716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2718 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2719 bb_predicate
.dump (dump_file
, info
->conds
);
2722 if (fbi
.info
&& nonconstant_names
.exists ())
2724 ipa_predicate phi_predicate
;
2725 bool first_phi
= true;
2727 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2731 && !phi_result_unknown_predicate (&fbi
, info
,
2738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2740 fprintf (dump_file
, " ");
2741 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2743 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2748 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2750 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2751 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2753 gimple
*stmt
= gsi_stmt (bsi
);
2754 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2755 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2757 ipa_predicate will_be_nonconstant
;
2759 /* This relation stmt should be folded after we remove
2760 __builtin_expect call. Adjust the cost here. */
2761 if (stmt
== fix_builtin_expect_stmt
)
2767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2769 fprintf (dump_file
, " ");
2770 print_gimple_stmt (dump_file
, stmt
, 0);
2771 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2772 freq
.to_double (), this_size
,
2776 if (is_gimple_call (stmt
)
2777 && !gimple_call_internal_p (stmt
))
2779 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2780 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2782 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2783 resolved as constant. We however don't want to optimize
2784 out the cgraph edges. */
2785 if (nonconstant_names
.exists ()
2786 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2787 && gimple_call_lhs (stmt
)
2788 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2790 ipa_predicate false_p
= false;
2791 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2794 if (ipa_node_params_sum
)
2796 int count
= gimple_call_num_args (stmt
);
2800 es
->param
.safe_grow_cleared (count
, true);
2801 for (i
= 0; i
< count
; i
++)
2803 int prob
= param_change_prob (&fbi
, stmt
, i
);
2804 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2805 es
->param
[i
].change_prob
= prob
;
2806 es
->param
[i
].points_to_local_or_readonly_memory
2807 = points_to_local_or_readonly_memory_p
2808 (gimple_call_arg (stmt
, i
));
2811 /* We cannot setup VLA parameters during inlining. */
2812 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2813 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2815 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2818 es
->call_stmt_size
= this_size
;
2819 es
->call_stmt_time
= this_time
;
2820 es
->loop_depth
= bb_loop_depth (bb
);
2821 edge_set_predicate (edge
, &bb_predicate
);
2822 if (edge
->speculative
)
2824 cgraph_edge
*indirect
2825 = edge
->speculative_call_indirect_edge ();
2826 ipa_call_summary
*es2
2827 = ipa_call_summaries
->get_create (indirect
);
2828 ipa_call_summaries
->duplicate (edge
, indirect
,
2831 /* Edge is the first direct call.
2832 create and duplicate call summaries for multiple
2833 speculative call targets. */
2834 for (cgraph_edge
*direct
2835 = edge
->next_speculative_call_target ();
2837 direct
= direct
->next_speculative_call_target ())
2839 ipa_call_summary
*es3
2840 = ipa_call_summaries
->get_create (direct
);
2841 ipa_call_summaries
->duplicate (edge
, direct
,
2847 /* TODO: When conditional jump or switch is known to be constant, but
2848 we did not translate it into the predicates, we really can account
2849 just maximum of the possible paths. */
2852 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2853 stmt
, nonconstant_names
);
2855 will_be_nonconstant
= true;
2856 if (this_time
|| this_size
)
2858 sreal final_time
= (sreal
)this_time
* freq
;
2860 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2861 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2863 "\t\t50%% will be eliminated by inlining\n");
2864 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2865 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2867 ipa_predicate p
= bb_predicate
& will_be_nonconstant
;
2869 /* We can ignore statement when we proved it is never going
2870 to happen, but we cannot do that for call statements
2871 because edges are accounted specially. */
2873 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2879 /* We account everything but the calls. Calls have their own
2880 size/time info attached to cgraph edges. This is necessary
2881 in order to make the cost disappear after inlining. */
2882 if (!is_gimple_call (stmt
))
2887 = bb_predicate
& ipa_predicate::not_inlined ();
2888 info
->account_size_time (this_size
* prob
,
2889 (final_time
* prob
) / 2, ip
,
2893 info
->account_size_time (this_size
* (2 - prob
),
2894 (final_time
* (2 - prob
) / 2),
2899 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2901 info
->fp_expressions
= true;
2903 fprintf (dump_file
, " fp_expression set\n");
2907 /* For target specific information, we want to scan all statements
2908 rather than those statements with non-zero weights, to avoid
2909 missing to scan something interesting for target information,
2910 such as: internal function calls. */
2911 if (scan_for_target_info
)
2912 scan_for_target_info
=
2913 targetm
.target_option
.update_ipa_fn_target_info
2914 (info
->target_info
, stmt
);
2916 /* Account cost of address calculations in the statements. */
2917 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2919 for (tree op
= gimple_op (stmt
, i
);
2920 op
&& handled_component_p (op
);
2921 op
= TREE_OPERAND (op
, 0))
2922 if ((TREE_CODE (op
) == ARRAY_REF
2923 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2924 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2926 ipa_predicate p
= bb_predicate
;
2928 p
= p
& will_be_nonconstant_expr_predicate
2929 (&fbi
, info
, params_summary
,
2930 TREE_OPERAND (op
, 1),
2938 "\t\tAccounting address calculation.\n");
2939 info
->account_size_time (ipa_fn_summary::size_scale
,
2951 if (nonconstant_names
.exists () && !early
)
2953 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2954 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
2955 param_ipa_max_loop_predicates
);
2957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2958 flow_loops_dump (dump_file
, NULL
, 0);
2960 for (auto loop
: loops_list (cfun
, 0))
2962 ipa_predicate loop_iterations
= true;
2966 class tree_niter_desc niter_desc
;
2967 if (!loop
->header
->aux
)
2970 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
2971 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
2973 bb_predicate
= *(ipa_predicate
*)loop
->header
->aux
;
2974 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
2975 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2976 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2977 && !is_gimple_min_invariant (niter_desc
.niter
))
2979 ipa_predicate will_be_nonconstant
2980 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2984 if (will_be_nonconstant
!= true)
2985 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2986 if (will_be_nonconstant
!= true
2987 && will_be_nonconstant
!= false)
2988 loop_iterations
&= will_be_nonconstant
;
2990 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
2991 phdr_freq
, max_loop_predicates
);
2994 /* To avoid quadratic behavior we analyze stride predicates only
2995 with respect to the containing loop. Thus we simply iterate
2996 over all defs in the outermost loop body. */
2997 for (class loop
*loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2998 loop
!= NULL
; loop
= loop
->next
)
3000 ipa_predicate loop_stride
= true;
3001 basic_block
*body
= get_loop_body (loop
);
3002 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
3003 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
3004 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
3006 gimple_stmt_iterator gsi
;
3010 bb_predicate
= *(ipa_predicate
*)body
[i
]->aux
;
3011 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
3014 gimple
*stmt
= gsi_stmt (gsi
);
3016 if (!is_gimple_assign (stmt
))
3019 tree def
= gimple_assign_lhs (stmt
);
3020 if (TREE_CODE (def
) != SSA_NAME
)
3024 if (!simple_iv (loop_containing_stmt (stmt
),
3025 loop_containing_stmt (stmt
),
3027 || is_gimple_min_invariant (iv
.step
))
3030 ipa_predicate will_be_nonconstant
3031 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3035 if (will_be_nonconstant
!= true)
3036 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3037 if (will_be_nonconstant
!= true
3038 && will_be_nonconstant
!= false)
3039 loop_stride
= loop_stride
& will_be_nonconstant
;
3042 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3043 phdr_freq
, max_loop_predicates
);
3048 FOR_ALL_BB_FN (bb
, my_function
)
3054 edge_predicate_pool
.remove ((ipa_predicate
*)bb
->aux
);
3056 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3059 edge_predicate_pool
.remove ((ipa_predicate
*)e
->aux
);
3063 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3064 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3066 ss
->self_size
= size
;
3067 nonconstant_names
.release ();
3068 ipa_release_body_info (&fbi
);
3069 if (opt_for_fn (node
->decl
, optimize
))
3072 loop_optimizer_finalize ();
3073 else if (!ipa_edge_args_sum
)
3074 ipa_free_all_node_params ();
3075 free_dominance_info (CDI_DOMINATORS
);
3076 free_dominance_info (CDI_POST_DOMINATORS
);
3080 fprintf (dump_file
, "\n");
3081 ipa_dump_fn_summary (dump_file
, node
);
3086 /* Compute function summary.
3087 EARLY is true when we compute parameters during early opts. */
3090 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3092 HOST_WIDE_INT self_stack_size
;
3093 struct cgraph_edge
*e
;
3095 gcc_assert (!node
->inlined_to
);
3097 if (!ipa_fn_summaries
)
3098 ipa_fn_summary_alloc ();
3100 /* Create a new ipa_fn_summary. */
3101 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3102 ipa_fn_summaries
->remove (node
);
3103 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3104 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3106 /* Estimate the stack size for the function if we're optimizing. */
3107 self_stack_size
= optimize
&& !node
->thunk
3108 ? estimated_stack_frame_size (node
) : 0;
3109 size_info
->estimated_self_stack_size
= self_stack_size
;
3110 info
->estimated_stack_size
= self_stack_size
;
3114 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3115 ipa_predicate t
= true;
3117 node
->can_change_signature
= false;
3118 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3119 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3120 info
->account_size_time (ipa_fn_summary::size_scale
3121 * opt_for_fn (node
->decl
,
3122 param_uninlined_function_thunk_insns
),
3123 opt_for_fn (node
->decl
,
3124 param_uninlined_function_thunk_time
), t
, t
);
3125 t
= ipa_predicate::not_inlined ();
3126 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3127 ipa_update_overall_fn_summary (node
);
3128 size_info
->self_size
= size_info
->size
;
3129 if (stdarg_p (TREE_TYPE (node
->decl
)))
3131 info
->inlinable
= false;
3132 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3135 info
->inlinable
= true;
3139 /* Even is_gimple_min_invariant rely on current_function_decl. */
3140 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3142 /* During IPA profile merging we may be called w/o virtual SSA form
3144 update_ssa (TODO_update_ssa_only_virtuals
);
3146 /* Can this function be inlined at all? */
3147 if (!opt_for_fn (node
->decl
, optimize
)
3148 && !lookup_attribute ("always_inline",
3149 DECL_ATTRIBUTES (node
->decl
)))
3150 info
->inlinable
= false;
3152 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3154 bool no_signature
= false;
3155 /* Type attributes can use parameter indices to describe them.
3156 Special case fn spec since we can safely preserve them in
3157 modref summaries. */
3158 for (tree list
= TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
));
3159 list
&& !no_signature
; list
= TREE_CHAIN (list
))
3160 if (!ipa_param_adjustments::type_attribute_allowed_p
3161 (get_attribute_name (list
)))
3165 fprintf (dump_file
, "No signature change:"
3166 " function type has unhandled attribute %s.\n",
3167 IDENTIFIER_POINTER (get_attribute_name (list
)));
3169 no_signature
= true;
3171 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
3172 parm
&& !no_signature
; parm
= DECL_CHAIN (parm
))
3173 if (variably_modified_type_p (TREE_TYPE (parm
), node
->decl
))
3177 fprintf (dump_file
, "No signature change:"
3178 " has parameter with variably modified type.\n");
3180 no_signature
= true;
3183 /* Likewise for #pragma omp declare simd functions or functions
3184 with simd attribute. */
3186 || lookup_attribute ("omp declare simd",
3187 DECL_ATTRIBUTES (node
->decl
)))
3188 node
->can_change_signature
= false;
3191 /* Otherwise, inlinable functions always can change signature. */
3192 if (info
->inlinable
)
3193 node
->can_change_signature
= true;
3196 /* Functions calling builtin_apply cannot change signature. */
3197 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3199 tree
cdecl = e
->callee
->decl
;
3200 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
3201 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
3204 node
->can_change_signature
= !e
;
3207 analyze_function_body (node
, early
);
3211 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3212 size_info
->size
= size_info
->self_size
;
3213 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3215 /* Code above should compute exactly the same result as
3216 ipa_update_overall_fn_summary except for case when speculative
3217 edges are present since these are accounted to size but not
3218 self_size. Do not compare time since different order the roundoff
3219 errors result in slight changes. */
3220 ipa_update_overall_fn_summary (node
);
3223 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3226 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3231 /* Compute parameters of functions used by inliner using
3232 current_function_decl. */
3235 compute_fn_summary_for_current (void)
3237 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3241 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3242 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3243 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3246 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3247 int *size
, int *time
,
3248 ipa_call_arg_values
*avals
)
3251 struct cgraph_node
*callee
;
3252 class ipa_fn_summary
*isummary
;
3253 enum availability avail
;
3257 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3259 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3262 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3263 if (!target
|| speculative
)
3266 /* Account for difference in cost between indirect and direct calls. */
3267 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3268 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3269 gcc_checking_assert (*time
>= 0);
3270 gcc_checking_assert (*size
>= 0);
3272 callee
= cgraph_node::get (target
);
3273 if (!callee
|| !callee
->definition
)
3275 callee
= callee
->function_symbol (&avail
);
3276 if (avail
< AVAIL_AVAILABLE
)
3278 isummary
= ipa_fn_summaries
->get (callee
);
3279 if (isummary
== NULL
)
3282 return isummary
->inlinable
;
3285 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3286 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3287 devirtualized. AVALS, if non-NULL, describes the context of the call site
3288 as far as values of parameters are concerened. */
3291 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3292 sreal
*time
, ipa_call_arg_values
*avals
,
3295 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3296 int call_size
= es
->call_stmt_size
;
3297 int call_time
= es
->call_stmt_time
;
3300 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3301 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3302 *hints
|= INLINE_HINT_indirect_call
;
3303 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3306 *min_size
+= cur_size
;
3308 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3312 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3313 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3316 Helper for estimate_calls_size_and_time which does the same but
3317 (in most cases) faster. */
3320 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3321 int *min_size
, sreal
*time
,
3323 clause_t possible_truths
,
3324 ipa_call_arg_values
*avals
)
3326 struct cgraph_edge
*e
;
3327 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3329 if (!e
->inline_failed
)
3331 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3332 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3333 hints
, possible_truths
, avals
);
3337 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3339 /* Do not care about zero sized builtins. */
3340 if (!es
->call_stmt_size
)
3342 gcc_checking_assert (!es
->call_stmt_time
);
3346 || es
->predicate
->evaluate (possible_truths
))
3348 /* Predicates of calls shall not use NOT_CHANGED codes,
3349 so we do not need to compute probabilities. */
3350 estimate_edge_size_and_time (e
, size
,
3351 es
->predicate
? NULL
: min_size
,
3352 time
, avals
, hints
);
3355 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3357 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3359 || es
->predicate
->evaluate (possible_truths
))
3360 estimate_edge_size_and_time (e
, size
,
3361 es
->predicate
? NULL
: min_size
,
3362 time
, avals
, hints
);
3366 /* Populate sum->call_size_time_table for edges from NODE. */
3369 summarize_calls_size_and_time (struct cgraph_node
*node
,
3370 ipa_fn_summary
*sum
)
3372 struct cgraph_edge
*e
;
3373 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3375 if (!e
->inline_failed
)
3377 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3378 summarize_calls_size_and_time (e
->callee
, sum
);
3384 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3386 ipa_predicate pred
= true;
3387 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3390 pred
= *es
->predicate
;
3391 sum
->account_size_time (size
, time
, pred
, pred
, true);
3393 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3398 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3399 ipa_predicate pred
= true;
3400 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3403 pred
= *es
->predicate
;
3404 sum
->account_size_time (size
, time
, pred
, pred
, true);
3408 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3409 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3410 context of the call site. */
3413 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3414 int *min_size
, sreal
*time
,
3416 clause_t possible_truths
,
3417 ipa_call_arg_values
*avals
)
3419 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3420 bool use_table
= true;
3422 gcc_assert (node
->callees
|| node
->indirect_calls
);
3424 /* During early inlining we do not calculate info for very
3425 large functions and thus there is no need for producing
3427 if (!ipa_node_params_sum
)
3429 /* Do not calculate summaries for simple wrappers; it is waste
3431 else if (node
->callees
&& node
->indirect_calls
3432 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3434 /* If there is an indirect edge that may be optimized, we need
3435 to go the slow way. */
3436 else if (avals
&& hints
3437 && (avals
->m_known_vals
.length ()
3438 || avals
->m_known_contexts
.length ()
3439 || avals
->m_known_aggs
.length ()))
3441 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (node
);
3442 unsigned int nargs
= params_summary
3443 ? ipa_get_param_count (params_summary
) : 0;
3445 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3447 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3448 && (avals
->safe_sval_at (i
)
3449 || (avals
->m_known_aggs
.length () > i
3450 && avals
->m_known_aggs
[i
].items
.length ())))
3452 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3453 && (avals
->m_known_contexts
.length () > i
3454 && !avals
->m_known_contexts
[i
].useless_p ()))
3459 /* Fast path is via the call size time table. */
3462 /* Build summary if it is absent. */
3463 if (!sum
->call_size_time_table
.length ())
3465 ipa_predicate true_pred
= true;
3466 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3467 summarize_calls_size_and_time (node
, sum
);
3470 int old_size
= *size
;
3471 sreal old_time
= time
? *time
: 0;
3474 *min_size
+= sum
->call_size_time_table
[0].size
;
3479 /* Walk the table and account sizes and times. */
3480 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3482 if (e
->exec_predicate
.evaluate (possible_truths
))
3489 /* Be careful and see if both methods agree. */
3490 if ((flag_checking
|| dump_file
)
3491 /* Do not try to sanity check when we know we lost some
3493 && sum
->call_size_time_table
.length ()
3494 < ipa_fn_summary::max_size_time_table_size
)
3496 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3497 possible_truths
, avals
);
3498 gcc_assert (*size
== old_size
);
3499 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3501 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3502 old_time
.to_double (),
3503 time
->to_double ());
3506 /* Slow path by walking all edges. */
3508 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3509 possible_truths
, avals
);
3512 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3513 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3516 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3517 clause_t nonspec_possible_truths
,
3518 vec
<inline_param_summary
>
3519 inline_param_summary
,
3520 ipa_auto_call_arg_values
*arg_values
)
3521 : m_node (node
), m_possible_truths (possible_truths
),
3522 m_nonspec_possible_truths (nonspec_possible_truths
),
3523 m_inline_param_summary (inline_param_summary
),
3524 m_avals (arg_values
)
3528 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3531 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3533 m_node
= ctx
.m_node
;
3534 m_possible_truths
= ctx
.m_possible_truths
;
3535 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3536 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3537 unsigned int nargs
= params_summary
3538 ? ipa_get_param_count (params_summary
) : 0;
3540 m_inline_param_summary
= vNULL
;
3541 /* Copy the info only if there is at least one useful entry. */
3542 if (ctx
.m_inline_param_summary
.exists ())
3544 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3546 for (unsigned int i
= 0; i
< n
; i
++)
3547 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3548 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3550 m_inline_param_summary
3551 = ctx
.m_inline_param_summary
.copy ();
3555 m_avals
.m_known_vals
= vNULL
;
3556 if (ctx
.m_avals
.m_known_vals
.exists ())
3558 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3560 for (unsigned int i
= 0; i
< n
; i
++)
3561 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3562 && ctx
.m_avals
.m_known_vals
[i
])
3564 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3569 m_avals
.m_known_contexts
= vNULL
;
3570 if (ctx
.m_avals
.m_known_contexts
.exists ())
3572 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3574 for (unsigned int i
= 0; i
< n
; i
++)
3575 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3576 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3578 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3583 m_avals
.m_known_aggs
= vNULL
;
3584 if (ctx
.m_avals
.m_known_aggs
.exists ())
3586 unsigned int n
= MIN (ctx
.m_avals
.m_known_aggs
.length (), nargs
);
3588 for (unsigned int i
= 0; i
< n
; i
++)
3589 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3590 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3592 m_avals
.m_known_aggs
3593 = ipa_copy_agg_values (ctx
.m_avals
.m_known_aggs
);
3598 m_avals
.m_known_value_ranges
= vNULL
;
3601 /* Release memory used by known_vals/contexts/aggs vectors. and
3602 inline_param_summary. */
3605 ipa_cached_call_context::release ()
3607 /* See if context is initialized at first place. */
3610 ipa_release_agg_values (m_avals
.m_known_aggs
, true);
3611 m_avals
.m_known_vals
.release ();
3612 m_avals
.m_known_contexts
.release ();
3613 m_inline_param_summary
.release ();
3616 /* Return true if CTX describes the same call context as THIS. */
3619 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3621 if (m_node
!= ctx
.m_node
3622 || m_possible_truths
!= ctx
.m_possible_truths
3623 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3626 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3627 unsigned int nargs
= params_summary
3628 ? ipa_get_param_count (params_summary
) : 0;
3630 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3632 for (unsigned int i
= 0; i
< nargs
; i
++)
3634 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3636 if (i
>= m_inline_param_summary
.length ()
3637 || m_inline_param_summary
[i
].useless_p ())
3639 if (i
< ctx
.m_inline_param_summary
.length ()
3640 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3644 if (i
>= ctx
.m_inline_param_summary
.length ()
3645 || ctx
.m_inline_param_summary
[i
].useless_p ())
3647 if (i
< m_inline_param_summary
.length ()
3648 && !m_inline_param_summary
[i
].useless_p ())
3652 if (!m_inline_param_summary
[i
].equal_to
3653 (ctx
.m_inline_param_summary
[i
]))
3657 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3659 for (unsigned int i
= 0; i
< nargs
; i
++)
3661 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3663 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3665 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3666 && ctx
.m_avals
.m_known_vals
[i
])
3670 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3671 || !ctx
.m_avals
.m_known_vals
[i
])
3673 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3677 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3681 if (m_avals
.m_known_contexts
.exists ()
3682 || ctx
.m_avals
.m_known_contexts
.exists ())
3684 for (unsigned int i
= 0; i
< nargs
; i
++)
3686 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3688 if (i
>= m_avals
.m_known_contexts
.length ()
3689 || m_avals
.m_known_contexts
[i
].useless_p ())
3691 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3692 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3696 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3697 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3699 if (i
< m_avals
.m_known_contexts
.length ()
3700 && !m_avals
.m_known_contexts
[i
].useless_p ())
3704 if (!m_avals
.m_known_contexts
[i
].equal_to
3705 (ctx
.m_avals
.m_known_contexts
[i
]))
3709 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3711 for (unsigned int i
= 0; i
< nargs
; i
++)
3713 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3715 if (i
>= m_avals
.m_known_aggs
.length ()
3716 || m_avals
.m_known_aggs
[i
].is_empty ())
3718 if (i
< ctx
.m_avals
.m_known_aggs
.length ()
3719 && !ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3723 if (i
>= ctx
.m_avals
.m_known_aggs
.length ()
3724 || ctx
.m_avals
.m_known_aggs
[i
].is_empty ())
3726 if (i
< m_avals
.m_known_aggs
.length ()
3727 && !m_avals
.m_known_aggs
[i
].is_empty ())
3731 if (!m_avals
.m_known_aggs
[i
].equal_to (ctx
.m_avals
.m_known_aggs
[i
]))
3738 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3739 this context. Always compute size and min_size. Only compute time and
3740 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3744 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3745 bool est_times
, bool est_hints
)
3747 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3752 ipa_hints hints
= 0;
3753 sreal loops_with_known_iterations
= 0;
3754 sreal loops_with_known_strides
= 0;
3757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3760 fprintf (dump_file
, " Estimating body: %s\n"
3761 " Known to be false: ", m_node
->dump_name ());
3763 for (i
= ipa_predicate::not_inlined_condition
;
3764 i
< (ipa_predicate::first_dynamic_condition
3765 + (int) vec_safe_length (info
->conds
)); i
++)
3766 if (!(m_possible_truths
& (1 << i
)))
3769 fprintf (dump_file
, ", ");
3771 dump_condition (dump_file
, info
->conds
, i
);
3775 if (m_node
->callees
|| m_node
->indirect_calls
)
3776 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3777 est_times
? &time
: NULL
,
3778 est_hints
? &hints
: NULL
, m_possible_truths
,
3781 sreal nonspecialized_time
= time
;
3783 min_size
+= info
->size_time_table
[0].size
;
3784 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3786 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3788 /* Because predicates are conservative, it can happen that nonconst is 1
3792 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3794 gcc_checking_assert (e
->time
>= 0);
3795 gcc_checking_assert (time
>= 0);
3797 /* We compute specialized size only because size of nonspecialized
3798 copy is context independent.
3800 The difference between nonspecialized execution and specialized is
3801 that nonspecialized is not going to have optimized out computations
3802 known to be constant in a specialized setting. */
3807 nonspecialized_time
+= e
->time
;
3810 else if (!m_inline_param_summary
.exists ())
3817 int prob
= e
->nonconst_predicate
.probability
3818 (info
->conds
, m_possible_truths
,
3819 m_inline_param_summary
);
3820 gcc_checking_assert (prob
>= 0);
3821 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3822 if (prob
== REG_BR_PROB_BASE
)
3825 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3827 gcc_checking_assert (time
>= 0);
3830 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3831 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3832 gcc_checking_assert (min_size
>= 0);
3833 gcc_checking_assert (size
>= 0);
3834 gcc_checking_assert (time
>= 0);
3835 /* nonspecialized_time should be always bigger than specialized time.
3836 Roundoff issues however may get into the way. */
3837 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3839 /* Roundoff issues may make specialized time bigger than nonspecialized
3840 time. We do not really want that to happen because some heuristics
3841 may get confused by seeing negative speedups. */
3842 if (time
> nonspecialized_time
)
3843 time
= nonspecialized_time
;
3848 hints
|= INLINE_HINT_in_scc
;
3849 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3850 hints
|= INLINE_HINT_declared_inline
;
3851 if (info
->builtin_constant_p_parms
.length ()
3852 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3853 hints
|= INLINE_HINT_builtin_constant_p
;
3855 ipa_freqcounting_predicate
*fcp
;
3856 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3857 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3859 hints
|= INLINE_HINT_loop_iterations
;
3860 loops_with_known_iterations
+= fcp
->freq
;
3862 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3864 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3865 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3867 hints
|= INLINE_HINT_loop_stride
;
3868 loops_with_known_strides
+= fcp
->freq
;
3870 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3873 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3874 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3878 fprintf (dump_file
, "\n size:%i", (int) size
);
3880 fprintf (dump_file
, " time:%f nonspec time:%f",
3881 time
.to_double (), nonspecialized_time
.to_double ());
3883 fprintf (dump_file
, " loops with known iterations:%f "
3884 "known strides:%f", loops_with_known_iterations
.to_double (),
3885 loops_with_known_strides
.to_double ());
3886 fprintf (dump_file
, "\n");
3890 estimates
->time
= time
;
3891 estimates
->nonspecialized_time
= nonspecialized_time
;
3893 estimates
->size
= size
;
3894 estimates
->min_size
= min_size
;
3896 estimates
->hints
= hints
;
3901 /* Estimate size and time needed to execute callee of EDGE assuming that
3902 parameters known to be constant at caller of EDGE are propagated.
3903 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3904 and types for parameters. */
3907 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3908 ipa_auto_call_arg_values
*avals
,
3909 ipa_call_estimates
*estimates
)
3911 clause_t clause
, nonspec_clause
;
3913 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
3915 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
3916 ctx
.estimate_size_and_time (estimates
);
3919 /* Return stack frame offset where frame of NODE is supposed to start inside
3920 of the function it is inlined to.
3921 Return 0 for functions that are not inlined. */
3924 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3926 HOST_WIDE_INT offset
= 0;
3927 if (!node
->inlined_to
)
3929 node
= node
->callers
->caller
;
3932 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3933 if (!node
->inlined_to
)
3935 node
= node
->callers
->caller
;
3940 /* Update summary information of inline clones after inlining.
3941 Compute peak stack usage. */
3944 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3946 struct cgraph_edge
*e
;
3948 ipa_propagate_frequency (node
);
3949 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3951 if (!e
->inline_failed
)
3952 inline_update_callee_summaries (e
->callee
, depth
);
3954 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3956 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3957 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3960 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3961 INLINED_EDGE has been inlined.
3963 When function A is inlined in B and A calls C with parameter that
3964 changes with probability PROB1 and C is known to be passthrough
3965 of argument if B that change with probability PROB2, the probability
3966 of change is now PROB1*PROB2. */
3969 remap_edge_params (struct cgraph_edge
*inlined_edge
,
3970 struct cgraph_edge
*edge
)
3972 if (ipa_node_params_sum
)
3975 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
3978 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3979 class ipa_call_summary
*inlined_es
3980 = ipa_call_summaries
->get (inlined_edge
);
3982 if (es
->param
.length () == 0)
3985 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3987 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3988 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3989 || jfunc
->type
== IPA_JF_ANCESTOR
)
3991 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3992 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3993 : ipa_get_jf_ancestor_formal_id (jfunc
);
3994 if (id
< (int) inlined_es
->param
.length ())
3996 int prob1
= es
->param
[i
].change_prob
;
3997 int prob2
= inlined_es
->param
[id
].change_prob
;
3998 int prob
= combine_probabilities (prob1
, prob2
);
4000 if (prob1
&& prob2
&& !prob
)
4003 es
->param
[i
].change_prob
= prob
;
4006 ->param
[id
].points_to_local_or_readonly_memory
)
4007 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4009 if (!es
->param
[i
].points_to_local_or_readonly_memory
4010 && jfunc
->type
== IPA_JF_CONST
4011 && points_to_local_or_readonly_memory_p
4012 (ipa_get_jf_constant (jfunc
)))
4013 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4019 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4021 Remap predicates of callees of NODE. Rest of arguments match
4024 Also update change probabilities. */
4027 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
4028 struct cgraph_node
*node
,
4029 class ipa_fn_summary
*info
,
4030 class ipa_node_params
*params_summary
,
4031 class ipa_fn_summary
*callee_info
,
4032 const vec
<int> &operand_map
,
4033 const vec
<HOST_WIDE_INT
> &offset_map
,
4034 clause_t possible_truths
,
4035 ipa_predicate
*toplev_predicate
)
4037 struct cgraph_edge
*e
, *next
;
4038 for (e
= node
->callees
; e
; e
= next
)
4041 next
= e
->next_callee
;
4043 if (e
->inline_failed
)
4045 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4046 remap_edge_params (inlined_edge
, e
);
4050 p
= es
->predicate
->remap_after_inlining
4051 (info
, params_summary
,
4052 callee_info
, operand_map
,
4053 offset_map
, possible_truths
,
4055 edge_set_predicate (e
, &p
);
4058 edge_set_predicate (e
, toplev_predicate
);
4061 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
4062 params_summary
, callee_info
,
4063 operand_map
, offset_map
, possible_truths
,
4066 for (e
= node
->indirect_calls
; e
; e
= next
)
4068 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4070 next
= e
->next_callee
;
4072 remap_edge_params (inlined_edge
, e
);
4075 p
= es
->predicate
->remap_after_inlining
4076 (info
, params_summary
,
4077 callee_info
, operand_map
, offset_map
,
4078 possible_truths
, *toplev_predicate
);
4079 edge_set_predicate (e
, &p
);
4082 edge_set_predicate (e
, toplev_predicate
);
4086 /* Run remap_after_inlining on each predicate in V. */
4089 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4090 class ipa_node_params
*params_summary
,
4091 class ipa_fn_summary
*callee_info
,
4092 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4093 const vec
<int> &operand_map
,
4094 const vec
<HOST_WIDE_INT
> &offset_map
,
4095 clause_t possible_truths
,
4096 ipa_predicate
*toplev_predicate
)
4099 ipa_freqcounting_predicate
*fcp
;
4100 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4103 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4104 callee_info
, operand_map
,
4105 offset_map
, possible_truths
,
4107 if (p
!= false && p
!= true)
4108 *fcp
->predicate
&= p
;
4112 /* We inlined EDGE. Update summary of the function we inlined into. */
4115 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4117 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4118 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4119 ? edge
->caller
->inlined_to
: edge
->caller
);
4120 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4121 clause_t clause
= 0; /* not_inline is known to be false. */
4123 auto_vec
<int, 8> operand_map
;
4124 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4126 ipa_predicate toplev_predicate
;
4127 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4128 ipa_node_params
*params_summary
= (ipa_node_params_sum
4129 ? ipa_node_params_sum
->get (to
) : NULL
);
4132 toplev_predicate
= *es
->predicate
;
4134 toplev_predicate
= true;
4136 info
->fp_expressions
|= callee_info
->fp_expressions
;
4137 info
->target_info
|= callee_info
->target_info
;
4139 if (callee_info
->conds
)
4141 ipa_auto_call_arg_values avals
;
4142 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4144 if (ipa_node_params_sum
&& callee_info
->conds
)
4146 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4147 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4152 operand_map
.safe_grow_cleared (count
, true);
4153 offset_map
.safe_grow_cleared (count
, true);
4155 for (i
= 0; i
< count
; i
++)
4157 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4160 /* TODO: handle non-NOPs when merging. */
4161 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4163 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4164 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4165 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4168 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4170 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4171 if (offset
>= 0 && offset
< INT_MAX
)
4173 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4174 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4176 offset_map
[i
] = offset
;
4179 operand_map
[i
] = map
;
4180 gcc_assert (map
< ipa_get_param_count (params_summary
));
4184 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4185 if (ip
< count
&& operand_map
[ip
] >= 0)
4186 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4188 sreal freq
= edge
->sreal_frequency ();
4189 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4192 p
= e
->exec_predicate
.remap_after_inlining
4193 (info
, params_summary
,
4194 callee_info
, operand_map
,
4197 ipa_predicate nonconstp
;
4198 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4199 (info
, params_summary
,
4200 callee_info
, operand_map
,
4203 if (p
!= false && nonconstp
!= false)
4205 sreal add_time
= ((sreal
)e
->time
* freq
);
4206 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4208 if (prob
!= REG_BR_PROB_BASE
)
4209 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4210 if (prob
!= REG_BR_PROB_BASE
4211 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4213 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4214 (double) prob
/ REG_BR_PROB_BASE
);
4216 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4219 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4220 callee_info
, operand_map
,
4221 offset_map
, clause
, &toplev_predicate
);
4222 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4223 info
->loop_iterations
, operand_map
,
4224 offset_map
, clause
, &toplev_predicate
);
4225 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4226 info
->loop_strides
, operand_map
,
4227 offset_map
, clause
, &toplev_predicate
);
4229 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4230 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4232 if (info
->estimated_stack_size
< peak
)
4233 info
->estimated_stack_size
= peak
;
4235 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4236 if (info
->call_size_time_table
.length ())
4239 sreal edge_time
= 0;
4241 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4242 /* Unaccount size and time of the optimized out call. */
4243 info
->account_size_time (-edge_size
, -edge_time
,
4244 es
->predicate
? *es
->predicate
: true,
4245 es
->predicate
? *es
->predicate
: true,
4247 /* Account new calls. */
4248 summarize_calls_size_and_time (edge
->callee
, info
);
4251 /* Free summaries that are not maintained for inline clones/edges. */
4252 ipa_call_summaries
->remove (edge
);
4253 ipa_fn_summaries
->remove (edge
->callee
);
4254 ipa_remove_from_growth_caches (edge
);
4257 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4258 overall size and time. Recompute it.
4259 If RESET is true also recompute call_time_size_table. */
4262 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4264 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4265 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4269 size_info
->size
= 0;
4271 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4273 size_info
->size
+= e
->size
;
4274 info
->time
+= e
->time
;
4276 info
->min_size
= info
->size_time_table
[0].size
;
4278 info
->call_size_time_table
.release ();
4279 if (node
->callees
|| node
->indirect_calls
)
4280 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4282 ~(clause_t
) (1 << ipa_predicate::false_condition
),
4284 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4285 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4289 /* This function performs intraprocedural analysis in NODE that is required to
4290 inline indirect calls. */
4293 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4295 ipa_analyze_node (node
);
4296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4298 ipa_print_node_params (dump_file
, node
);
4299 ipa_print_node_jump_functions (dump_file
, node
);
4304 /* Note function body size. */
4307 inline_analyze_function (struct cgraph_node
*node
)
4309 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4312 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4313 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4314 inline_indirect_intraprocedural_analysis (node
);
4315 compute_fn_summary (node
, false);
4318 struct cgraph_edge
*e
;
4319 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4320 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4321 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4322 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4329 /* Called when new function is inserted to callgraph late. */
4332 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4334 inline_analyze_function (node
);
4337 /* Note function body size. */
4340 ipa_fn_summary_generate (void)
4342 struct cgraph_node
*node
;
4344 FOR_EACH_DEFINED_FUNCTION (node
)
4345 if (DECL_STRUCT_FUNCTION (node
->decl
))
4346 node
->versionable
= tree_versionable_function_p (node
->decl
);
4348 ipa_fn_summary_alloc ();
4350 ipa_fn_summaries
->enable_insertion_hook ();
4352 ipa_register_cgraph_hooks ();
4354 FOR_EACH_DEFINED_FUNCTION (node
)
4356 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4357 || opt_for_fn (node
->decl
, optimize
)))
4358 inline_analyze_function (node
);
4362 /* Write inline summary for edge E to OB. */
4365 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4368 class ipa_call_summary
*es
= prevails
4369 ? ipa_call_summaries
->get_create (e
) : NULL
;
4373 int size
= streamer_read_uhwi (ib
);
4374 int time
= streamer_read_uhwi (ib
);
4375 int depth
= streamer_read_uhwi (ib
);
4379 es
->call_stmt_size
= size
;
4380 es
->call_stmt_time
= time
;
4381 es
->loop_depth
= depth
;
4384 bitpack_d bp
= streamer_read_bitpack (ib
);
4386 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4388 bp_unpack_value (&bp
, 1);
4392 edge_set_predicate (e
, &p
);
4393 length
= streamer_read_uhwi (ib
);
4395 && (e
->possibly_call_in_translation_unit_p ()
4396 /* Also stream in jump functions to builtins in hope that they
4397 will get fnspecs. */
4398 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4400 es
->param
.safe_grow_cleared (length
, true);
4401 for (i
= 0; i
< length
; i
++)
4403 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4404 es
->param
[i
].points_to_local_or_readonly_memory
4405 = streamer_read_uhwi (ib
);
4410 for (i
= 0; i
< length
; i
++)
4412 streamer_read_uhwi (ib
);
4413 streamer_read_uhwi (ib
);
4419 /* Stream in inline summaries from the section. */
4422 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4425 const struct lto_function_header
*header
=
4426 (const struct lto_function_header
*) data
;
4427 const int cfg_offset
= sizeof (struct lto_function_header
);
4428 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4429 const int string_offset
= main_offset
+ header
->main_size
;
4430 class data_in
*data_in
;
4431 unsigned int i
, count2
, j
;
4432 unsigned int f_count
;
4434 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4435 file_data
->mode_table
);
4438 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4439 header
->string_size
, vNULL
);
4440 f_count
= streamer_read_uhwi (&ib
);
4441 for (i
= 0; i
< f_count
; i
++)
4444 struct cgraph_node
*node
;
4445 class ipa_fn_summary
*info
;
4446 class ipa_node_params
*params_summary
;
4447 class ipa_size_summary
*size_info
;
4448 lto_symtab_encoder_t encoder
;
4449 struct bitpack_d bp
;
4450 struct cgraph_edge
*e
;
4453 index
= streamer_read_uhwi (&ib
);
4454 encoder
= file_data
->symtab_node_encoder
;
4455 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4457 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4458 params_summary
= node
->prevailing_p ()
4459 ? ipa_node_params_sum
->get (node
) : NULL
;
4460 size_info
= node
->prevailing_p ()
4461 ? ipa_size_summaries
->get_create (node
) : NULL
;
4463 int stack_size
= streamer_read_uhwi (&ib
);
4464 int size
= streamer_read_uhwi (&ib
);
4465 sreal time
= sreal::stream_in (&ib
);
4469 info
->estimated_stack_size
4470 = size_info
->estimated_self_stack_size
= stack_size
;
4471 size_info
->size
= size_info
->self_size
= size
;
4475 bp
= streamer_read_bitpack (&ib
);
4478 info
->inlinable
= bp_unpack_value (&bp
, 1);
4479 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4480 if (!lto_stream_offload_p
)
4481 info
->target_info
= streamer_read_uhwi (&ib
);
4485 bp_unpack_value (&bp
, 1);
4486 bp_unpack_value (&bp
, 1);
4487 if (!lto_stream_offload_p
)
4488 streamer_read_uhwi (&ib
);
4491 count2
= streamer_read_uhwi (&ib
);
4492 gcc_assert (!info
|| !info
->conds
);
4494 vec_safe_reserve_exact (info
->conds
, count2
);
4495 for (j
= 0; j
< count2
; j
++)
4498 unsigned int k
, count3
;
4499 c
.operand_num
= streamer_read_uhwi (&ib
);
4500 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4501 c
.type
= stream_read_tree (&ib
, data_in
);
4502 c
.val
= stream_read_tree (&ib
, data_in
);
4503 bp
= streamer_read_bitpack (&ib
);
4504 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4505 c
.by_ref
= bp_unpack_value (&bp
, 1);
4507 c
.offset
= streamer_read_uhwi (&ib
);
4508 count3
= streamer_read_uhwi (&ib
);
4511 vec_safe_reserve_exact (c
.param_ops
, count3
);
4513 ipa_set_param_used_by_ipa_predicates
4514 (params_summary
, c
.operand_num
, true);
4515 for (k
= 0; k
< count3
; k
++)
4517 struct expr_eval_op op
;
4518 enum gimple_rhs_class rhs_class
;
4519 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4520 op
.type
= stream_read_tree (&ib
, data_in
);
4521 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4523 case GIMPLE_UNARY_RHS
:
4525 op
.val
[0] = NULL_TREE
;
4526 op
.val
[1] = NULL_TREE
;
4529 case GIMPLE_BINARY_RHS
:
4530 case GIMPLE_TERNARY_RHS
:
4531 bp
= streamer_read_bitpack (&ib
);
4532 op
.index
= bp_unpack_value (&bp
, 2);
4533 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4534 if (rhs_class
== GIMPLE_BINARY_RHS
)
4535 op
.val
[1] = NULL_TREE
;
4537 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4541 fatal_error (UNKNOWN_LOCATION
,
4542 "invalid fnsummary in LTO stream");
4545 c
.param_ops
->quick_push (op
);
4548 info
->conds
->quick_push (c
);
4550 count2
= streamer_read_uhwi (&ib
);
4551 gcc_assert (!info
|| !info
->size_time_table
.length ());
4553 info
->size_time_table
.reserve_exact (count2
);
4554 for (j
= 0; j
< count2
; j
++)
4556 class size_time_entry e
;
4558 e
.size
= streamer_read_uhwi (&ib
);
4559 e
.time
= sreal::stream_in (&ib
);
4560 e
.exec_predicate
.stream_in (&ib
);
4561 e
.nonconst_predicate
.stream_in (&ib
);
4564 info
->size_time_table
.quick_push (e
);
4567 count2
= streamer_read_uhwi (&ib
);
4568 for (j
= 0; j
< count2
; j
++)
4571 sreal fcp_freq
= sreal::stream_in (&ib
);
4574 ipa_freqcounting_predicate fcp
;
4575 fcp
.predicate
= NULL
;
4576 set_hint_predicate (&fcp
.predicate
, p
);
4577 fcp
.freq
= fcp_freq
;
4578 vec_safe_push (info
->loop_iterations
, fcp
);
4581 count2
= streamer_read_uhwi (&ib
);
4582 for (j
= 0; j
< count2
; j
++)
4585 sreal fcp_freq
= sreal::stream_in (&ib
);
4588 ipa_freqcounting_predicate fcp
;
4589 fcp
.predicate
= NULL
;
4590 set_hint_predicate (&fcp
.predicate
, p
);
4591 fcp
.freq
= fcp_freq
;
4592 vec_safe_push (info
->loop_strides
, fcp
);
4595 count2
= streamer_read_uhwi (&ib
);
4597 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4598 for (j
= 0; j
< count2
; j
++)
4600 int parm
= streamer_read_uhwi (&ib
);
4602 info
->builtin_constant_p_parms
.quick_push (parm
);
4604 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4605 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4606 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4607 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4610 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4612 lto_data_in_delete (data_in
);
4616 /* Read inline summary. Jump functions are shared among ipa-cp
4617 and inliner, so when ipa-cp is active, we don't need to write them
4621 ipa_fn_summary_read (void)
4623 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4624 struct lto_file_decl_data
*file_data
;
4627 ipa_prop_read_jump_functions ();
4628 ipa_fn_summary_alloc ();
4630 while ((file_data
= file_data_vec
[j
++]))
4634 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4637 inline_read_section (file_data
, data
, len
);
4639 /* Fatal error here. We do not want to support compiling ltrans units
4640 with different version of compiler or different flags than the WPA
4641 unit, so this should never happen. */
4642 fatal_error (input_location
,
4643 "ipa inline summary is missing in input file");
4645 ipa_register_cgraph_hooks ();
4647 gcc_assert (ipa_fn_summaries
);
4648 ipa_fn_summaries
->enable_insertion_hook ();
4652 /* Write inline summary for edge E to OB. */
4655 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4657 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4660 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4661 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4662 streamer_write_uhwi (ob
, es
->loop_depth
);
4664 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4665 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4666 streamer_write_bitpack (&bp
);
4669 es
->predicate
->stream_out (ob
);
4671 streamer_write_uhwi (ob
, 0);
4672 streamer_write_uhwi (ob
, es
->param
.length ());
4673 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4675 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4676 streamer_write_uhwi (ob
, es
->param
[i
].points_to_local_or_readonly_memory
);
4681 /* Write inline summary for node in SET.
4682 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4683 active, we don't need to write them twice. */
4686 ipa_fn_summary_write (void)
4688 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4689 lto_symtab_encoder_iterator lsei
;
4690 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4691 unsigned int count
= 0;
4693 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4694 lsei_next_function_in_partition (&lsei
))
4696 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4697 if (cnode
->definition
&& !cnode
->alias
)
4700 streamer_write_uhwi (ob
, count
);
4702 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4703 lsei_next_function_in_partition (&lsei
))
4705 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4706 if (cnode
->definition
&& !cnode
->alias
)
4708 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4709 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4710 struct bitpack_d bp
;
4711 struct cgraph_edge
*edge
;
4714 struct condition
*c
;
4716 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4717 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4718 streamer_write_hwi (ob
, size_info
->self_size
);
4719 info
->time
.stream_out (ob
);
4720 bp
= bitpack_create (ob
->main_stream
);
4721 bp_pack_value (&bp
, info
->inlinable
, 1);
4722 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4723 streamer_write_bitpack (&bp
);
4724 if (!lto_stream_offload_p
)
4725 streamer_write_uhwi (ob
, info
->target_info
);
4726 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4727 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4730 struct expr_eval_op
*op
;
4732 streamer_write_uhwi (ob
, c
->operand_num
);
4733 streamer_write_uhwi (ob
, c
->code
);
4734 stream_write_tree (ob
, c
->type
, true);
4735 stream_write_tree (ob
, c
->val
, true);
4736 bp
= bitpack_create (ob
->main_stream
);
4737 bp_pack_value (&bp
, c
->agg_contents
, 1);
4738 bp_pack_value (&bp
, c
->by_ref
, 1);
4739 streamer_write_bitpack (&bp
);
4740 if (c
->agg_contents
)
4741 streamer_write_uhwi (ob
, c
->offset
);
4742 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4743 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4745 streamer_write_uhwi (ob
, op
->code
);
4746 stream_write_tree (ob
, op
->type
, true);
4749 bp
= bitpack_create (ob
->main_stream
);
4750 bp_pack_value (&bp
, op
->index
, 2);
4751 streamer_write_bitpack (&bp
);
4752 stream_write_tree (ob
, op
->val
[0], true);
4754 stream_write_tree (ob
, op
->val
[1], true);
4758 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4759 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4761 streamer_write_uhwi (ob
, e
->size
);
4762 e
->time
.stream_out (ob
);
4763 e
->exec_predicate
.stream_out (ob
);
4764 e
->nonconst_predicate
.stream_out (ob
);
4766 ipa_freqcounting_predicate
*fcp
;
4767 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4768 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4770 fcp
->predicate
->stream_out (ob
);
4771 fcp
->freq
.stream_out (ob
);
4773 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4774 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4776 fcp
->predicate
->stream_out (ob
);
4777 fcp
->freq
.stream_out (ob
);
4779 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4781 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4783 streamer_write_uhwi (ob
, ip
);
4784 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4785 write_ipa_call_summary (ob
, edge
);
4786 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4787 write_ipa_call_summary (ob
, edge
);
4790 streamer_write_char_stream (ob
->main_stream
, 0);
4791 produce_asm (ob
, NULL
);
4792 destroy_output_block (ob
);
4794 ipa_prop_write_jump_functions ();
4798 /* Release function summary. */
4801 ipa_free_fn_summary (void)
4803 if (!ipa_call_summaries
)
4805 ggc_delete (ipa_fn_summaries
);
4806 ipa_fn_summaries
= NULL
;
4807 delete ipa_call_summaries
;
4808 ipa_call_summaries
= NULL
;
4809 edge_predicate_pool
.release ();
4810 /* During IPA this is one of largest datastructures to release. */
4815 /* Release function summary. */
4818 ipa_free_size_summary (void)
4820 if (!ipa_size_summaries
)
4822 delete ipa_size_summaries
;
4823 ipa_size_summaries
= NULL
;
4828 const pass_data pass_data_local_fn_summary
=
4830 GIMPLE_PASS
, /* type */
4831 "local-fnsummary", /* name */
4832 OPTGROUP_INLINE
, /* optinfo_flags */
4833 TV_INLINE_PARAMETERS
, /* tv_id */
4834 0, /* properties_required */
4835 0, /* properties_provided */
4836 0, /* properties_destroyed */
4837 0, /* todo_flags_start */
4838 0, /* todo_flags_finish */
4841 class pass_local_fn_summary
: public gimple_opt_pass
4844 pass_local_fn_summary (gcc::context
*ctxt
)
4845 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4848 /* opt_pass methods: */
4849 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
4850 virtual unsigned int execute (function
*)
4852 return compute_fn_summary_for_current ();
4855 }; // class pass_local_fn_summary
4860 make_pass_local_fn_summary (gcc::context
*ctxt
)
4862 return new pass_local_fn_summary (ctxt
);
4866 /* Free inline summary. */
4870 const pass_data pass_data_ipa_free_fn_summary
=
4872 SIMPLE_IPA_PASS
, /* type */
4873 "free-fnsummary", /* name */
4874 OPTGROUP_NONE
, /* optinfo_flags */
4875 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4876 0, /* properties_required */
4877 0, /* properties_provided */
4878 0, /* properties_destroyed */
4879 0, /* todo_flags_start */
4880 0, /* todo_flags_finish */
4883 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4886 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4887 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4891 /* opt_pass methods: */
4892 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
4893 void set_pass_param (unsigned int n
, bool param
)
4895 gcc_assert (n
== 0);
4898 virtual bool gate (function
*) { return true; }
4899 virtual unsigned int execute (function
*)
4901 ipa_free_fn_summary ();
4902 /* Free ipa-prop structures if they are no longer needed. */
4903 ipa_free_all_structures_after_iinln ();
4905 ipa_free_size_summary ();
4911 }; // class pass_ipa_free_fn_summary
4915 simple_ipa_opt_pass
*
4916 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4918 return new pass_ipa_free_fn_summary (ctxt
);
4923 const pass_data pass_data_ipa_fn_summary
=
4925 IPA_PASS
, /* type */
4926 "fnsummary", /* name */
4927 OPTGROUP_INLINE
, /* optinfo_flags */
4928 TV_IPA_FNSUMMARY
, /* tv_id */
4929 0, /* properties_required */
4930 0, /* properties_provided */
4931 0, /* properties_destroyed */
4932 0, /* todo_flags_start */
4933 ( TODO_dump_symtab
), /* todo_flags_finish */
4936 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4939 pass_ipa_fn_summary (gcc::context
*ctxt
)
4940 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4941 ipa_fn_summary_generate
, /* generate_summary */
4942 ipa_fn_summary_write
, /* write_summary */
4943 ipa_fn_summary_read
, /* read_summary */
4944 NULL
, /* write_optimization_summary */
4945 NULL
, /* read_optimization_summary */
4946 NULL
, /* stmt_fixup */
4947 0, /* function_transform_todo_flags_start */
4948 NULL
, /* function_transform */
4949 NULL
) /* variable_transform */
4952 /* opt_pass methods: */
4953 virtual unsigned int execute (function
*) { return 0; }
4955 }; // class pass_ipa_fn_summary
4960 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4962 return new pass_ipa_fn_summary (ctxt
);
4965 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
4966 within the same process. For use by toplev::finalize. */
4969 ipa_fnsummary_cc_finalize (void)
4971 ipa_free_fn_summary ();