1 /* Function summary pass.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
65 #include "tree-streamer.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-iterator.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
79 #include "ipa-fnsummary.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
85 #include "stringpool.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
93 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
94 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
95 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
97 /* Edge predicates goes here. */
98 static object_allocator
<ipa_predicate
> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
103 ipa_dump_hints (FILE *f
, ipa_hints hints
)
107 fprintf (f
, "IPA hints:");
108 if (hints
& INLINE_HINT_indirect_call
)
110 hints
&= ~INLINE_HINT_indirect_call
;
111 fprintf (f
, " indirect_call");
113 if (hints
& INLINE_HINT_loop_iterations
)
115 hints
&= ~INLINE_HINT_loop_iterations
;
116 fprintf (f
, " loop_iterations");
118 if (hints
& INLINE_HINT_loop_stride
)
120 hints
&= ~INLINE_HINT_loop_stride
;
121 fprintf (f
, " loop_stride");
123 if (hints
& INLINE_HINT_same_scc
)
125 hints
&= ~INLINE_HINT_same_scc
;
126 fprintf (f
, " same_scc");
128 if (hints
& INLINE_HINT_in_scc
)
130 hints
&= ~INLINE_HINT_in_scc
;
131 fprintf (f
, " in_scc");
133 if (hints
& INLINE_HINT_cross_module
)
135 hints
&= ~INLINE_HINT_cross_module
;
136 fprintf (f
, " cross_module");
138 if (hints
& INLINE_HINT_declared_inline
)
140 hints
&= ~INLINE_HINT_declared_inline
;
141 fprintf (f
, " declared_inline");
143 if (hints
& INLINE_HINT_known_hot
)
145 hints
&= ~INLINE_HINT_known_hot
;
146 fprintf (f
, " known_hot");
148 if (hints
& INLINE_HINT_builtin_constant_p
)
150 hints
&= ~INLINE_HINT_builtin_constant_p
;
151 fprintf (f
, " builtin_constant_p");
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
165 ipa_fn_summary::account_size_time (int size
, sreal time
,
166 const ipa_predicate
&exec_pred
,
167 const ipa_predicate
&nonconst_pred_in
,
173 ipa_predicate nonconst_pred
;
174 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
176 if (exec_pred
== false)
179 nonconst_pred
= nonconst_pred_in
& exec_pred
;
181 if (nonconst_pred
== false)
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size
&& time
== 0 && table
->length ())
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time
>= 0 || call
);
192 for (i
= 0; table
->iterate (i
, &e
); i
++)
193 if (e
->exec_predicate
== exec_pred
194 && e
->nonconst_predicate
== nonconst_pred
)
199 if (i
== max_size_time_table_size
)
204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size
) / ipa_fn_summary::size_scale
,
214 (time
.to_double ()), found
? "" : "new ");
215 exec_pred
.dump (dump_file
, conds
, 0);
216 if (exec_pred
!= nonconst_pred
)
218 fprintf (dump_file
, " nonconst:");
219 nonconst_pred
.dump (dump_file
, conds
);
222 fprintf (dump_file
, "\n");
226 class size_time_entry new_entry
;
227 new_entry
.size
= size
;
228 new_entry
.time
= time
;
229 new_entry
.exec_predicate
= exec_pred
;
230 new_entry
.nonconst_predicate
= nonconst_pred
;
232 call_size_time_table
.safe_push (new_entry
);
234 size_time_table
.safe_push (new_entry
);
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge
*
250 redirect_to_unreachable (struct cgraph_edge
*e
)
252 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
253 struct cgraph_node
*target
254 = cgraph_node::get_create (builtin_decl_unreachable ());
257 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
259 e
= cgraph_edge::make_direct (e
, target
);
261 e
->redirect_callee (target
);
262 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
263 e
->inline_failed
= CIF_UNREACHABLE
;
264 e
->count
= profile_count::zero ();
265 es
->call_stmt_size
= 0;
266 es
->call_stmt_time
= 0;
268 callee
->remove_symbol_and_inline_clones ();
272 /* Set predicate for edge E. */
275 edge_set_predicate (struct cgraph_edge
*e
, ipa_predicate
*predicate
)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
280 if (predicate
&& *predicate
== false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e
->speculative
|| e
->callee
))
285 e
= redirect_to_unreachable (e
);
287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
288 if (predicate
&& *predicate
!= true)
291 es
->predicate
= edge_predicate_pool
.allocate ();
292 *es
->predicate
= *predicate
;
297 edge_predicate_pool
.remove (es
->predicate
);
298 es
->predicate
= NULL
;
302 /* Set predicate for hint *P. */
305 set_hint_predicate (ipa_predicate
**p
, ipa_predicate new_predicate
)
307 if (new_predicate
== false || new_predicate
== true)
310 edge_predicate_pool
.remove (*p
);
316 *p
= edge_predicate_pool
.allocate ();
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
327 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
328 const ipa_predicate
&new_predicate
, sreal add_freq
,
329 unsigned max_num_predicates
)
331 if (new_predicate
== false || new_predicate
== true)
333 ipa_freqcounting_predicate
*f
;
334 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
335 if (new_predicate
== f
->predicate
)
340 if (vec_safe_length (*v
) >= max_num_predicates
)
341 /* Too many different predicates to account for. */
344 ipa_freqcounting_predicate fcp
;
345 fcp
.predicate
= NULL
;
346 set_hint_predicate (&fcp
.predicate
, new_predicate
);
348 vec_safe_push (*v
, fcp
);
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
367 ERROR_MARK value of an argument means compile time invariant. */
370 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
372 ipa_auto_call_arg_values
*avals
,
373 clause_t
*ret_clause
,
374 clause_t
*ret_nonspec_clause
,
375 ipa_call_summary
*es
)
377 clause_t clause
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
378 clause_t nonspec_clause
= 1 << ipa_predicate::not_inlined_condition
;
379 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
383 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
388 struct expr_eval_op
*op
;
390 if (c
->code
== ipa_predicate::not_sra_candidate
)
394 || (int)es
->param
.length () <= c
->operand_num
395 || !es
->param
[c
->operand_num
].points_to_possible_sra_candidate
)
396 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
397 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
403 if (c
->code
== ipa_predicate::changed
405 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
408 if (tree sval
= avals
->safe_sval_at (c
->operand_num
))
409 val
= ipa_find_agg_cst_from_init (sval
, c
->offset
, c
->by_ref
);
412 ipa_argagg_value_list
avs (avals
);
413 val
= avs
.get_value (c
->operand_num
, c
->offset
/ BITS_PER_UNIT
,
419 val
= avals
->safe_sval_at (c
->operand_num
);
420 if (val
&& val
== error_mark_node
421 && c
->code
!= ipa_predicate::changed
)
426 && (c
->code
== ipa_predicate::changed
427 || c
->code
== ipa_predicate::is_not_constant
))
429 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
430 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
433 if (c
->code
== ipa_predicate::changed
)
435 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
439 if (c
->code
== ipa_predicate::is_not_constant
)
441 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
445 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
447 if (c
->type
!= TREE_TYPE (val
))
448 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
449 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
454 val
= fold_unary (op
->code
, op
->type
, val
);
455 else if (!op
->val
[1])
456 val
= fold_binary (op
->code
, op
->type
,
457 op
->index
? op
->val
[0] : val
,
458 op
->index
? val
: op
->val
[0]);
459 else if (op
->index
== 0)
460 val
= fold_ternary (op
->code
, op
->type
,
461 val
, op
->val
[0], op
->val
[1]);
462 else if (op
->index
== 1)
463 val
= fold_ternary (op
->code
, op
->type
,
464 op
->val
[0], val
, op
->val
[1]);
465 else if (op
->index
== 2)
466 val
= fold_ternary (op
->code
, op
->type
,
467 op
->val
[0], op
->val
[1], val
);
473 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
476 if (res
&& integer_zerop (res
))
478 if (res
&& integer_onep (res
))
480 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
482 |= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
486 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
488 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
490 Value_Range
vr (avals
->m_known_value_ranges
[c
->operand_num
]);
491 if (!vr
.undefined_p ()
493 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
495 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
496 range_cast (vr
, c
->type
);
498 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
500 if (vr
.varying_p () || vr
.undefined_p ())
503 Value_Range
res (op
->type
);
506 Value_Range
varying (op
->type
);
507 varying
.set_varying (op
->type
);
508 range_op_handler
handler (op
->code
);
510 || !res
.supports_type_p (op
->type
)
511 || !handler
.fold_range (res
, op
->type
, vr
, varying
))
512 res
.set_varying (op
->type
);
514 else if (!op
->val
[1])
516 Value_Range
op0 (op
->type
);
517 range_op_handler
handler (op
->code
);
519 ipa_range_set_and_normalize (op0
, op
->val
[0]);
522 || !res
.supports_type_p (op
->type
)
523 || !handler
.fold_range (res
, op
->type
,
524 op
->index
? op0
: vr
,
525 op
->index
? vr
: op0
))
526 res
.set_varying (op
->type
);
529 res
.set_varying (op
->type
);
532 if (!vr
.varying_p () && !vr
.undefined_p ())
535 Value_Range
val_vr (TREE_TYPE (c
->val
));
536 range_op_handler
handler (c
->code
);
538 ipa_range_set_and_normalize (val_vr
, c
->val
);
541 || !val_vr
.supports_type_p (TREE_TYPE (c
->val
))
542 || !handler
.fold_range (res
, boolean_type_node
, vr
, val_vr
))
543 res
.set_varying (boolean_type_node
);
551 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
552 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
554 *ret_clause
= clause
;
555 if (ret_nonspec_clause
)
556 *ret_nonspec_clause
= nonspec_clause
;
559 /* Return true if VRP will be exectued on the function.
560 We do not want to anticipate optimizations that will not happen.
562 FIXME: This can be confused with -fdisable and debug counters and thus
563 it should not be used for correctness (only to make heuristics work).
564 This means that inliner should do its own optimizations of expressions
565 that it predicts to be constant so wrong code can not be triggered by
566 builtin_constant_p. */
569 vrp_will_run_p (struct cgraph_node
*node
)
571 return (opt_for_fn (node
->decl
, optimize
)
572 && !opt_for_fn (node
->decl
, optimize_debug
)
573 && opt_for_fn (node
->decl
, flag_tree_vrp
));
576 /* Similarly about FRE. */
579 fre_will_run_p (struct cgraph_node
*node
)
581 return (opt_for_fn (node
->decl
, optimize
)
582 && !opt_for_fn (node
->decl
, optimize_debug
)
583 && opt_for_fn (node
->decl
, flag_tree_fre
));
586 /* Work out what conditions might be true at invocation of E.
587 Compute costs for inlined edge if INLINE_P is true.
589 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
590 (if non-NULL) conditions evaluated for nonspecialized clone called
593 Vectors in AVALS will be populated with useful known information about
594 argument values - information not known to have any uses will be omitted -
595 except for m_known_contexts which will only be calculated if
596 COMPUTE_CONTEXTS is true. */
599 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
600 clause_t
*clause_ptr
,
601 clause_t
*nonspec_clause_ptr
,
602 ipa_auto_call_arg_values
*avals
,
603 bool compute_contexts
)
605 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
606 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
607 class ipa_edge_args
*args
;
608 class ipa_call_summary
*es
= NULL
;
611 *clause_ptr
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
613 if (ipa_node_params_sum
614 && !e
->call_stmt_cannot_inline_p
615 && (info
->conds
|| compute_contexts
)
616 && (args
= ipa_edge_args_sum
->get (e
)) != NULL
)
618 struct cgraph_node
*caller
;
619 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
620 int i
, count
= ipa_get_cs_argument_count (args
);
621 es
= ipa_call_summaries
->get (e
);
625 if (e
->caller
->inlined_to
)
626 caller
= e
->caller
->inlined_to
;
629 caller_parms_info
= ipa_node_params_sum
->get (caller
);
630 callee_pi
= ipa_node_params_sum
->get (callee
);
632 /* Watch for thunks. */
634 /* Watch for variadic functions. */
635 count
= MIN (count
, ipa_get_param_count (callee_pi
));
639 for (i
= 0; i
< count
; i
++)
641 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
643 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
644 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
646 /* Determine if we know constant value of the parameter. */
647 tree type
= ipa_get_type (callee_pi
, i
);
648 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
, type
);
650 if (!cst
&& e
->call_stmt
651 && i
< (int)gimple_call_num_args (e
->call_stmt
))
653 cst
= gimple_call_arg (e
->call_stmt
, i
);
654 if (!is_gimple_min_invariant (cst
))
659 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
660 if (!avals
->m_known_vals
.length ())
661 avals
->m_known_vals
.safe_grow_cleared (count
, true);
662 avals
->m_known_vals
[i
] = cst
;
664 else if (inline_p
&& !es
->param
[i
].change_prob
)
666 if (!avals
->m_known_vals
.length ())
667 avals
->m_known_vals
.safe_grow_cleared (count
, true);
668 avals
->m_known_vals
[i
] = error_mark_node
;
671 /* If we failed to get simple constant, try value range. */
672 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
673 && vrp_will_run_p (caller
)
674 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
676 Value_Range
vr (type
);
678 ipa_value_range_from_jfunc (vr
, caller_parms_info
, e
, jf
, type
);
679 if (!vr
.undefined_p () && !vr
.varying_p ())
681 if (!avals
->m_known_value_ranges
.length ())
683 avals
->m_known_value_ranges
.safe_grow (count
, true);
684 for (int i
= 0; i
< count
; ++i
)
685 new (&avals
->m_known_value_ranges
[i
])
688 avals
->m_known_value_ranges
[i
] = vr
;
692 /* Determine known aggregate values. */
693 if (fre_will_run_p (caller
))
694 ipa_push_agg_values_from_jfunc (caller_parms_info
,
696 &avals
->m_known_aggs
);
699 /* For calls used in polymorphic calls we further determine
700 polymorphic call context. */
702 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
704 ipa_polymorphic_call_context
705 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
706 if (!ctx
.useless_p ())
708 if (!avals
->m_known_contexts
.length ())
709 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
710 avals
->m_known_contexts
[i
]
711 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
716 gcc_assert (!count
|| callee
->thunk
);
718 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
720 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
722 for (i
= 0; i
< count
; i
++)
724 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
725 if (!is_gimple_min_invariant (cst
))
729 if (!avals
->m_known_vals
.length ())
730 avals
->m_known_vals
.safe_grow_cleared (count
, true);
731 avals
->m_known_vals
[i
] = cst
;
736 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
737 nonspec_clause_ptr
, es
);
741 /* Allocate the function summary. */
744 ipa_fn_summary_alloc (void)
746 gcc_checking_assert (!ipa_fn_summaries
);
747 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
748 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
749 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
752 ipa_call_summary::~ipa_call_summary ()
755 edge_predicate_pool
.remove (predicate
);
760 ipa_fn_summary::~ipa_fn_summary ()
762 unsigned len
= vec_safe_length (loop_iterations
);
763 for (unsigned i
= 0; i
< len
; i
++)
764 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
765 len
= vec_safe_length (loop_strides
);
766 for (unsigned i
= 0; i
< len
; i
++)
767 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
769 call_size_time_table
.release ();
770 vec_free (loop_iterations
);
771 vec_free (loop_strides
);
772 builtin_constant_p_parms
.release ();
776 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
779 for (e
= node
->callees
; e
; e
= e
->next_callee
)
780 ipa_call_summaries
->remove (e
);
781 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
782 ipa_call_summaries
->remove (e
);
785 /* Duplicate predicates in loop hint vector, allocating memory for them and
786 remove and deallocate any uninteresting (true or false) ones. Return the
789 static vec
<ipa_freqcounting_predicate
, va_gc
> *
790 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
791 clause_t possible_truths
)
793 if (vec_safe_length (v
) == 0)
796 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
797 int len
= res
->length();
798 for (int i
= len
- 1; i
>= 0; i
--)
800 ipa_predicate new_predicate
801 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
802 /* We do not want to free previous predicate; it is used by node
804 (*res
)[i
].predicate
= NULL
;
805 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
807 if (!(*res
)[i
].predicate
)
808 res
->unordered_remove (i
);
815 /* Hook that is called by cgraph.cc when a node is duplicated. */
817 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
819 ipa_fn_summary
*src_info
,
820 ipa_fn_summary
*info
)
822 new (info
) ipa_fn_summary (*src_info
);
823 /* TODO: as an optimization, we may avoid copying conditions
824 that are known to be false or true. */
825 info
->conds
= vec_safe_copy (info
->conds
);
827 clone_info
*cinfo
= clone_info::get (dst
);
828 /* When there are any replacements in the function body, see if we can figure
829 out that something was optimized out. */
830 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
832 /* Use SRC parm info since it may not be copied yet. */
833 ipa_node_params
*parms_info
= ipa_node_params_sum
->get (src
);
834 ipa_auto_call_arg_values avals
;
835 int count
= ipa_get_param_count (parms_info
);
837 clause_t possible_truths
;
838 ipa_predicate true_pred
= true;
840 int optimized_out_size
= 0;
841 bool inlined_to_p
= false;
842 struct cgraph_edge
*edge
, *next
;
844 info
->size_time_table
.release ();
845 avals
.m_known_vals
.safe_grow_cleared (count
, true);
846 for (i
= 0; i
< count
; i
++)
848 struct ipa_replace_map
*r
;
850 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
852 if (r
->parm_num
== i
)
854 avals
.m_known_vals
[i
] = r
->new_tree
;
859 evaluate_conditions_for_known_args (dst
, false,
862 /* We are going to specialize,
863 so ignore nonspec truths. */
867 info
->account_size_time (0, 0, true_pred
, true_pred
);
869 /* Remap size_time vectors.
870 Simplify the predicate by pruning out alternatives that are known
872 TODO: as on optimization, we can also eliminate conditions known
874 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
876 ipa_predicate new_exec_pred
;
877 ipa_predicate new_nonconst_pred
;
878 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
880 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
882 if (new_exec_pred
== false || new_nonconst_pred
== false)
883 optimized_out_size
+= e
->size
;
885 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
889 /* Remap edge predicates with the same simplification as above.
890 Also copy constantness arrays. */
891 for (edge
= dst
->callees
; edge
; edge
= next
)
893 ipa_predicate new_predicate
;
894 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
895 next
= edge
->next_callee
;
897 if (!edge
->inline_failed
)
901 new_predicate
= es
->predicate
->remap_after_duplication
903 if (new_predicate
== false && *es
->predicate
!= false)
904 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
905 edge_set_predicate (edge
, &new_predicate
);
908 /* Remap indirect edge predicates with the same simplification as above.
909 Also copy constantness arrays. */
910 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
912 ipa_predicate new_predicate
;
913 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
914 next
= edge
->next_callee
;
916 gcc_checking_assert (edge
->inline_failed
);
919 new_predicate
= es
->predicate
->remap_after_duplication
921 if (new_predicate
== false && *es
->predicate
!= false)
923 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
924 edge_set_predicate (edge
, &new_predicate
);
926 info
->loop_iterations
927 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
930 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
932 if (info
->builtin_constant_p_parms
.length())
934 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
936 info
->builtin_constant_p_parms
= vNULL
;
937 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
938 if (!avals
.m_known_vals
[ip
])
939 info
->builtin_constant_p_parms
.safe_push (ip
);
942 /* If inliner or someone after inliner will ever start producing
943 non-trivial clones, we will get trouble with lack of information
944 about updating self sizes, because size vectors already contains
945 sizes of the callees. */
946 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
950 info
->size_time_table
= src_info
->size_time_table
.copy ();
951 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
952 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
954 info
->builtin_constant_p_parms
955 = info
->builtin_constant_p_parms
.copy ();
957 ipa_freqcounting_predicate
*f
;
958 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
960 ipa_predicate p
= *f
->predicate
;
962 set_hint_predicate (&f
->predicate
, p
);
964 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
966 ipa_predicate p
= *f
->predicate
;
968 set_hint_predicate (&f
->predicate
, p
);
971 if (!dst
->inlined_to
)
972 ipa_update_overall_fn_summary (dst
);
976 /* Hook that is called by cgraph.cc when a node is duplicated. */
979 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
980 struct cgraph_edge
*dst
,
981 class ipa_call_summary
*srcinfo
,
982 class ipa_call_summary
*info
)
984 new (info
) ipa_call_summary (*srcinfo
);
985 info
->predicate
= NULL
;
986 edge_set_predicate (dst
, srcinfo
->predicate
);
987 info
->param
= srcinfo
->param
.copy ();
988 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
990 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
991 - eni_size_weights
.call_cost
);
992 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
993 - eni_time_weights
.call_cost
);
997 /* Dump edge summaries associated to NODE and recursively to all clones.
1001 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
1002 class ipa_fn_summary
*info
)
1004 struct cgraph_edge
*edge
;
1005 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
1007 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1008 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1012 "%*s%s %s\n%*s freq:%4.2f",
1013 indent
, "", callee
->dump_name (),
1014 !edge
->inline_failed
1015 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1016 indent
, "", edge
->sreal_frequency ().to_double ());
1018 if (cross_module_call_p (edge
))
1019 fprintf (f
, " cross module");
1022 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1023 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1025 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1026 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1028 fprintf (f
, " callee size:%2i stack:%2i",
1029 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1030 (int) s
->estimated_stack_size
);
1032 if (es
&& es
->predicate
)
1034 fprintf (f
, " predicate: ");
1035 es
->predicate
->dump (f
, info
->conds
);
1039 if (es
&& es
->param
.exists ())
1040 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1042 int prob
= es
->param
[i
].change_prob
;
1045 fprintf (f
, "%*s op%i is compile time invariant\n",
1047 else if (prob
!= REG_BR_PROB_BASE
)
1048 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1049 prob
* 100.0 / REG_BR_PROB_BASE
);
1050 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1051 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1053 if (es
->param
[i
].points_to_possible_sra_candidate
)
1054 fprintf (f
, "%*s op%i points to possible sra candidate\n",
1057 if (!edge
->inline_failed
)
1059 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1060 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1062 (int) ipa_get_stack_frame_offset (callee
),
1063 (int) ss
->estimated_self_stack_size
);
1064 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1067 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1069 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1070 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1074 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1075 es
->call_stmt_time
);
1078 fprintf (f
, "predicate: ");
1079 es
->predicate
->dump (f
, info
->conds
);
1088 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1090 if (node
->definition
)
1092 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1093 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1098 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1099 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1100 fprintf (f
, " always_inline");
1102 fprintf (f
, " inlinable");
1103 if (s
->fp_expressions
)
1104 fprintf (f
, " fp_expression");
1105 if (s
->builtin_constant_p_parms
.length ())
1107 fprintf (f
, " builtin_constant_p_parms");
1108 for (unsigned int i
= 0;
1109 i
< s
->builtin_constant_p_parms
.length (); i
++)
1110 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1112 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1113 fprintf (f
, " self size: %i\n", ss
->self_size
);
1114 fprintf (f
, " global size: %i\n", ss
->size
);
1115 fprintf (f
, " min size: %i\n", s
->min_size
);
1116 fprintf (f
, " self stack: %i\n",
1117 (int) ss
->estimated_self_stack_size
);
1118 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1120 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1122 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1123 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1125 fprintf (f
, " size:%f, time:%f",
1126 (double) e
->size
/ ipa_fn_summary::size_scale
,
1127 e
->time
.to_double ());
1128 if (e
->exec_predicate
!= true)
1130 fprintf (f
, ", executed if:");
1131 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1133 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1135 fprintf (f
, ", nonconst if:");
1136 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1140 ipa_freqcounting_predicate
*fcp
;
1141 bool first_fcp
= true;
1142 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1146 fprintf (f
, " loop iterations:");
1149 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1150 fcp
->predicate
->dump (f
, s
->conds
);
1153 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1157 fprintf (f
, " loop strides:");
1160 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1161 fcp
->predicate
->dump (f
, s
->conds
);
1163 fprintf (f
, " calls:\n");
1164 dump_ipa_call_summary (f
, 4, node
, s
);
1167 fprintf (f
, " target_info: %x\n", s
->target_info
);
1170 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1175 ipa_debug_fn_summary (struct cgraph_node
*node
)
1177 ipa_dump_fn_summary (stderr
, node
);
1181 ipa_dump_fn_summaries (FILE *f
)
1183 struct cgraph_node
*node
;
1185 FOR_EACH_DEFINED_FUNCTION (node
)
1186 if (!node
->inlined_to
)
1187 ipa_dump_fn_summary (f
, node
);
1190 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1191 boolean variable pointed to by DATA. */
1194 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1197 bool *b
= (bool *) data
;
1202 /* If OP refers to value of function parameter, return the corresponding
1203 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1204 PARM_DECL) will be stored to *SIZE_P in that case too. */
1207 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1210 /* SSA_NAME referring to parm default def? */
1211 if (TREE_CODE (op
) == SSA_NAME
1212 && SSA_NAME_IS_DEFAULT_DEF (op
)
1213 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1216 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1217 return SSA_NAME_VAR (op
);
1219 /* Non-SSA parm reference? */
1220 if (TREE_CODE (op
) == PARM_DECL
1221 && fbi
->aa_walk_budget
> 0)
1223 bool modified
= false;
1226 ao_ref_init (&refd
, op
);
1227 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1228 mark_modified
, &modified
, NULL
, NULL
,
1229 fbi
->aa_walk_budget
);
1232 fbi
->aa_walk_budget
= 0;
1235 fbi
->aa_walk_budget
-= walked
;
1239 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1246 /* If OP refers to value of function parameter, return the corresponding
1247 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1248 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1249 stored to *SIZE_P in that case too. */
1252 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1255 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1259 if (TREE_CODE (op
) == SSA_NAME
1260 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1261 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1262 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1263 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1268 /* If OP refers to a value of a function parameter or value loaded from an
1269 aggregate passed to a parameter (either by value or reference), return TRUE
1270 and store the number of the parameter to *INDEX_P, the access size into
1271 *SIZE_P, and information whether and how it has been loaded from an
1272 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1273 statement in which OP is used or loaded. */
1276 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1277 gimple
*stmt
, tree op
, int *index_p
,
1279 struct agg_position_info
*aggpos
)
1281 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1283 gcc_checking_assert (aggpos
);
1286 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1289 aggpos
->agg_contents
= false;
1290 aggpos
->by_ref
= false;
1294 if (TREE_CODE (op
) == SSA_NAME
)
1296 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1297 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1299 stmt
= SSA_NAME_DEF_STMT (op
);
1300 op
= gimple_assign_rhs1 (stmt
);
1301 if (!REFERENCE_CLASS_P (op
))
1302 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1306 aggpos
->agg_contents
= true;
1307 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1308 stmt
, op
, index_p
, &aggpos
->offset
,
1309 size_p
, &aggpos
->by_ref
);
1312 /* If stmt is simple load or store of value pointed to by a function parmaeter,
1313 return its index. */
1316 load_or_store_of_ptr_parameter (ipa_func_body_info
*fbi
, gimple
*stmt
)
1320 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
1324 if (gimple_assign_load_p (stmt
))
1325 param
= gimple_assign_rhs1 (stmt
);
1326 else if (gimple_store_p (stmt
))
1327 param
= gimple_assign_lhs (stmt
);
1330 tree base
= get_base_address (param
);
1331 if (TREE_CODE (base
) != MEM_REF
1332 || TREE_CODE (TREE_OPERAND (base
, 0)) != SSA_NAME
1333 || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base
, 0)))
1335 tree p
= SSA_NAME_VAR (TREE_OPERAND (base
, 0));
1336 if (TREE_CODE (p
) != PARM_DECL
)
1338 return ipa_get_param_decl_index (fbi
->info
, p
);
1341 /* See if statement might disappear after inlining.
1342 0 - means not eliminated
1343 1 - half of statements goes away
1344 2 - for sure it is eliminated.
1345 We are not terribly sophisticated, basically looking for simple abstraction
1346 penalty wrappers. */
1349 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1351 enum gimple_code code
= gimple_code (stmt
);
1352 enum tree_code rhs_code
;
1362 if (gimple_num_ops (stmt
) != 2)
1365 rhs_code
= gimple_assign_rhs_code (stmt
);
1367 /* Casts of parameters, loads from parameters passed by reference
1368 and stores to return value or parameters are often free after
1369 inlining due to SRA and further combining.
1370 Assume that half of statements goes away. */
1371 if (CONVERT_EXPR_CODE_P (rhs_code
)
1372 || rhs_code
== VIEW_CONVERT_EXPR
1373 || rhs_code
== ADDR_EXPR
1374 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1376 tree rhs
= gimple_assign_rhs1 (stmt
);
1377 tree lhs
= gimple_assign_lhs (stmt
);
1378 tree inner_rhs
= get_base_address (rhs
);
1379 tree inner_lhs
= get_base_address (lhs
);
1380 bool rhs_free
= false;
1381 bool lhs_free
= false;
1388 /* Reads of parameter are expected to be free. */
1389 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1391 /* Match expressions of form &this->field. Those will most likely
1392 combine with something upstream after inlining. */
1393 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1395 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1396 if (TREE_CODE (op
) == PARM_DECL
)
1398 else if (TREE_CODE (op
) == MEM_REF
1399 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1404 /* When parameter is not SSA register because its address is taken
1405 and it is just copied into one, the statement will be completely
1406 free after inlining (we will copy propagate backward). */
1407 if (rhs_free
&& is_gimple_reg (lhs
))
1410 /* Reads of parameters passed by reference
1411 expected to be free (i.e. optimized out after inlining). */
1412 if (TREE_CODE (inner_rhs
) == MEM_REF
1413 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1416 /* Copying parameter passed by reference into gimple register is
1417 probably also going to copy propagate, but we can't be quite
1419 if (rhs_free
&& is_gimple_reg (lhs
))
1422 /* Writes to parameters, parameters passed by value and return value
1423 (either directly or passed via invisible reference) are free.
1425 TODO: We ought to handle testcase like
1426 struct a {int a,b;};
1434 This translate into:
1449 For that we either need to copy ipa-split logic detecting writes
1451 if (TREE_CODE (inner_lhs
) == PARM_DECL
1452 || TREE_CODE (inner_lhs
) == RESULT_DECL
1453 || (TREE_CODE (inner_lhs
) == MEM_REF
1454 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1456 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1457 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1458 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1460 0))) == RESULT_DECL
))))
1463 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1465 if (lhs_free
&& rhs_free
)
1474 /* Analyze EXPR if it represents a series of simple operations performed on
1475 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1476 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1477 Type of the parameter or load from an aggregate via the parameter is
1478 stored in *TYPE_P. Operations on the parameter are recorded to
1479 PARAM_OPS_P if it is not NULL. */
1482 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1483 gimple
*stmt
, tree expr
,
1484 int *index_p
, tree
*type_p
,
1485 struct agg_position_info
*aggpos
,
1486 expr_eval_ops
*param_ops_p
= NULL
)
1488 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1492 *param_ops_p
= NULL
;
1496 expr_eval_op eval_op
;
1498 unsigned cst_count
= 0;
1500 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1503 tree type
= TREE_TYPE (expr
);
1505 if (aggpos
->agg_contents
)
1507 /* Stop if containing bit-field. */
1508 if (TREE_CODE (expr
) == BIT_FIELD_REF
1509 || contains_bitfld_component_ref_p (expr
))
1517 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1519 stmt
= SSA_NAME_DEF_STMT (expr
);
1521 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
1523 int flags
= gimple_call_return_flags (call
);
1524 if (!(flags
& ERF_RETURNS_ARG
))
1526 int arg
= flags
& ERF_RETURN_ARG_MASK
;
1527 if (arg
>= (int)gimple_call_num_args (call
))
1529 expr
= gimple_call_arg (stmt
, arg
);
1533 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1536 switch (gimple_assign_rhs_class (stmt
))
1538 case GIMPLE_SINGLE_RHS
:
1539 expr
= gimple_assign_rhs1 (stmt
);
1542 case GIMPLE_UNARY_RHS
:
1546 case GIMPLE_BINARY_RHS
:
1550 case GIMPLE_TERNARY_RHS
:
1558 /* Stop if expression is too complex. */
1559 if (op_count
++ == op_limit
)
1564 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1565 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1566 eval_op
.val
[0] = NULL_TREE
;
1567 eval_op
.val
[1] = NULL_TREE
;
1571 for (unsigned i
= 0; i
< rhs_count
; i
++)
1573 tree op
= gimple_op (stmt
, i
+ 1);
1575 gcc_assert (op
&& !TYPE_P (op
));
1576 if (is_gimple_ip_invariant (op
))
1578 if (++cst_count
== rhs_count
)
1581 eval_op
.val
[cst_count
- 1] = op
;
1585 /* Found a non-constant operand, and record its index in rhs
1592 /* Found more than one non-constant operands. */
1598 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1601 /* Failed to decompose, free resource and return. */
1604 vec_free (*param_ops_p
);
1609 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1612 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1616 /* Avoid duplicates. */
1617 for (unsigned int i
= 0;
1618 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1621 summary
->builtin_constant_p_parms
.safe_push (parm
);
1624 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1625 predicates to the CFG edges. */
1628 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1629 class ipa_fn_summary
*summary
,
1630 class ipa_node_params
*params_summary
,
1635 struct agg_position_info aggpos
;
1636 enum tree_code code
, inverted_code
;
1641 expr_eval_ops param_ops
;
1643 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
1646 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1648 op
= gimple_cond_lhs (last
);
1650 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1653 code
= gimple_cond_code (last
);
1654 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1656 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1658 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1659 ? code
: inverted_code
);
1660 /* invert_tree_comparison will return ERROR_MARK on FP
1661 comparisons that are not EQ/NE instead of returning proper
1662 unordered one. Be sure it is not confused with NON_CONSTANT.
1664 And if the edge's target is the final block of diamond CFG graph
1665 of this conditional statement, we do not need to compute
1666 predicate for the edge because the final block's predicate must
1667 be at least as that of the first block of the statement. */
1668 if (this_code
!= ERROR_MARK
1669 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1672 = add_condition (summary
, params_summary
, index
,
1673 param_type
, &aggpos
,
1674 this_code
, gimple_cond_rhs (last
), param_ops
);
1675 e
->aux
= edge_predicate_pool
.allocate ();
1676 *(ipa_predicate
*) e
->aux
= p
;
1679 vec_free (param_ops
);
1683 if (TREE_CODE (op
) != SSA_NAME
)
1686 if (builtin_constant_p (op))
1690 Here we can predicate nonconstant_code. We can't
1691 really handle constant_code since we have no predicate
1692 for this and also the constant code is not known to be
1693 optimized away when inliner doesn't see operand is constant.
1694 Other optimizers might think otherwise. */
1695 if (gimple_cond_code (last
) != NE_EXPR
1696 || !integer_zerop (gimple_cond_rhs (last
)))
1698 set_stmt
= SSA_NAME_DEF_STMT (op
);
1699 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1700 || gimple_call_num_args (set_stmt
) != 1)
1702 op2
= gimple_call_arg (set_stmt
, 0);
1703 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1706 add_builtin_constant_p_parm (summary
, index
);
1707 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1709 ipa_predicate p
= add_condition (summary
, params_summary
, index
,
1710 param_type
, &aggpos
,
1711 ipa_predicate::is_not_constant
, NULL_TREE
);
1712 e
->aux
= edge_predicate_pool
.allocate ();
1713 *(ipa_predicate
*) e
->aux
= p
;
1718 /* If BB ends by a switch we can turn into predicates, attach corresponding
1719 predicates to the CFG edges. */
1722 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1723 class ipa_fn_summary
*summary
,
1724 class ipa_node_params
*params_summary
,
1729 struct agg_position_info aggpos
;
1735 expr_eval_ops param_ops
;
1737 gswitch
*last
= safe_dyn_cast
<gswitch
*> (*gsi_last_bb (bb
));
1740 op
= gimple_switch_index (last
);
1741 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1745 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1746 tree type
= TREE_TYPE (op
);
1747 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1748 param_ipa_max_switch_predicate_bounds
);
1749 int bound_count
= 0;
1750 // This can safely be an integer range, as switches can only hold
1754 get_range_query (cfun
)->range_of_expr (vr
, op
);
1755 if (vr
.undefined_p ())
1756 vr
.set_varying (TREE_TYPE (op
));
1757 tree vr_min
, vr_max
;
1758 // TODO: This entire function could use a rewrite to use the irange
1759 // API, instead of trying to recreate its intersection/union logic.
1760 // Any use of get_legacy_range() is a serious code smell.
1761 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
1762 wide_int vr_wmin
= wi::to_wide (vr_min
);
1763 wide_int vr_wmax
= wi::to_wide (vr_max
);
1765 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1767 e
->aux
= edge_predicate_pool
.allocate ();
1768 *(ipa_predicate
*) e
->aux
= false;
1771 e
= gimple_switch_edge (cfun
, last
, 0);
1772 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1773 default case if its target basic block is in convergence point of all
1774 switch cases, which can be determined by checking whether it
1775 post-dominates the switch statement. */
1776 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1777 bound_count
= INT_MAX
;
1779 n
= gimple_switch_num_labels (last
);
1780 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1782 tree cl
= gimple_switch_label (last
, case_idx
);
1783 tree min
= CASE_LOW (cl
);
1784 tree max
= CASE_HIGH (cl
);
1787 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1789 /* The case value might not have same type as switch expression,
1790 extend the value based on the expression type. */
1791 if (TREE_TYPE (min
) != type
)
1792 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1796 else if (TREE_TYPE (max
) != type
)
1797 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1799 /* The case's target basic block is in convergence point of all switch
1800 cases, its predicate should be at least as that of the switch
1802 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1804 else if (min
== max
)
1805 p
= add_condition (summary
, params_summary
, index
, param_type
,
1806 &aggpos
, EQ_EXPR
, min
, param_ops
);
1809 ipa_predicate p1
, p2
;
1810 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1811 &aggpos
, GE_EXPR
, min
, param_ops
);
1812 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1813 &aggpos
, LE_EXPR
, max
, param_ops
);
1816 *(ipa_predicate
*) e
->aux
1817 = p
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1819 /* If there are too many disjoint case ranges, predicate for default
1820 case might become too complicated. So add a limit here. */
1821 if (bound_count
> bound_limit
)
1824 bool new_range
= true;
1826 if (!ranges
.is_empty ())
1828 wide_int curr_wmin
= wi::to_wide (min
);
1829 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1831 /* Merge case ranges if they are continuous. */
1832 if (curr_wmin
== last_wmax
+ 1)
1834 else if (vr_type
== VR_ANTI_RANGE
)
1836 /* If two disjoint case ranges can be connected by anti-range
1837 of switch index, combine them to one range. */
1838 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1839 vr_type
= VR_UNDEFINED
;
1840 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1845 /* Create/extend a case range. And we count endpoints of range set,
1846 this number nearly equals to number of conditions that we will create
1847 for predicate of default case. */
1850 bound_count
+= (min
== max
) ? 1 : 2;
1851 ranges
.safe_push (std::make_pair (min
, max
));
1855 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1856 ranges
.last ().second
= max
;
1860 e
= gimple_switch_edge (cfun
, last
, 0);
1861 if (bound_count
> bound_limit
)
1863 *(ipa_predicate
*) e
->aux
= true;
1864 vec_free (param_ops
);
1868 ipa_predicate p_seg
= true;
1869 ipa_predicate p_all
= false;
1871 if (vr_type
!= VR_RANGE
)
1873 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1874 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1877 /* Construct predicate to represent default range set that is negation of
1878 all case ranges. Case range is classified as containing single/non-single
1879 values. Suppose a piece of case ranges in the following.
1881 [D1...D2] [S1] ... [Sn] [D3...D4]
1883 To represent default case's range sets between two non-single value
1884 case ranges (From D2 to D3), we construct predicate as:
1886 D2 < x < D3 && x != S1 && ... && x != Sn
1888 for (size_t i
= 0; i
< ranges
.length (); i
++)
1890 tree min
= ranges
[i
].first
;
1891 tree max
= ranges
[i
].second
;
1894 p_seg
&= add_condition (summary
, params_summary
, index
,
1895 param_type
, &aggpos
, NE_EXPR
,
1899 /* Do not create sub-predicate for range that is beyond low bound
1901 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1903 p_seg
&= add_condition (summary
, params_summary
, index
,
1904 param_type
, &aggpos
,
1905 LT_EXPR
, min
, param_ops
);
1906 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1909 /* Do not create sub-predicate for range that is beyond up bound
1911 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1917 p_seg
= add_condition (summary
, params_summary
, index
,
1918 param_type
, &aggpos
, GT_EXPR
,
1923 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1924 *(ipa_predicate
*) e
->aux
1925 = p_all
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1927 vec_free (param_ops
);
1931 /* For each BB in NODE attach to its AUX pointer predicate under
1932 which it is executable. */
1935 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1936 struct cgraph_node
*node
,
1937 class ipa_fn_summary
*summary
,
1938 class ipa_node_params
*params_summary
)
1940 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1944 FOR_EACH_BB_FN (bb
, my_function
)
1946 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1947 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1950 /* Entry block is always executable. */
1951 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1952 = edge_predicate_pool
.allocate ();
1953 *(ipa_predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1955 /* A simple dataflow propagation of predicates forward in the CFG.
1956 TODO: work in reverse postorder. */
1960 FOR_EACH_BB_FN (bb
, my_function
)
1962 ipa_predicate p
= false;
1965 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1969 ipa_predicate this_bb_predicate
1970 = *(ipa_predicate
*) e
->src
->aux
;
1972 this_bb_predicate
&= (*(ipa_predicate
*) e
->aux
);
1973 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1980 basic_block pdom_bb
;
1985 bb
->aux
= edge_predicate_pool
.allocate ();
1986 *((ipa_predicate
*) bb
->aux
) = p
;
1988 else if (p
!= *(ipa_predicate
*) bb
->aux
)
1990 /* This OR operation is needed to ensure monotonous data flow
1991 in the case we hit the limit on number of clauses and the
1992 and/or operations above give approximate answers. */
1993 p
= p
.or_with (summary
->conds
, *(ipa_predicate
*)bb
->aux
);
1994 if (p
!= *(ipa_predicate
*)bb
->aux
)
1997 *((ipa_predicate
*)bb
->aux
) = p
;
2001 /* For switch/if statement, we can OR-combine predicates of all
2002 its cases/branches to get predicate for basic block in their
2003 convergence point, but sometimes this will generate very
2004 complicated predicate. Actually, we can get simplified
2005 predicate in another way by using the fact that predicate
2006 for a basic block must also hold true for its post dominators.
2007 To be specific, basic block in convergence point of
2008 conditional statement should include predicate of the
2010 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
2011 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
2013 else if (!pdom_bb
->aux
)
2016 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
2017 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
2019 else if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
2021 p
= p
.or_with (summary
->conds
,
2022 *(ipa_predicate
*)pdom_bb
->aux
);
2023 if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
2026 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
2035 /* Return predicate specifying when the STMT might have result that is not
2036 a compile time constant. */
2038 static ipa_predicate
2039 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
2040 class ipa_fn_summary
*summary
,
2041 class ipa_node_params
*params_summary
,
2043 vec
<ipa_predicate
> nonconstant_names
)
2048 while (UNARY_CLASS_P (expr
))
2049 expr
= TREE_OPERAND (expr
, 0);
2051 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
2052 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2053 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
2054 ipa_predicate::changed
, NULL_TREE
);
2055 if (is_gimple_min_invariant (expr
))
2057 if (TREE_CODE (expr
) == SSA_NAME
)
2058 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
2059 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
2062 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2064 TREE_OPERAND (expr
, 0),
2070 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2072 TREE_OPERAND (expr
, 1),
2074 return p1
.or_with (summary
->conds
, p2
);
2076 else if (TREE_CODE (expr
) == COND_EXPR
)
2079 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2081 TREE_OPERAND (expr
, 0),
2087 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2089 TREE_OPERAND (expr
, 1),
2093 p1
= p1
.or_with (summary
->conds
, p2
);
2094 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2096 TREE_OPERAND (expr
, 2),
2098 return p2
.or_with (summary
->conds
, p1
);
2100 else if (TREE_CODE (expr
) == CALL_EXPR
)
2110 /* Return predicate specifying when the STMT might have result that is not
2111 a compile time constant. */
2113 static ipa_predicate
2114 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2115 class ipa_fn_summary
*summary
,
2116 class ipa_node_params
*params_summary
,
2118 vec
<ipa_predicate
> nonconstant_names
)
2120 ipa_predicate p
= true;
2123 tree param_type
= NULL_TREE
;
2124 ipa_predicate op_non_const
;
2127 struct agg_position_info aggpos
;
2129 /* What statements might be optimized away
2130 when their arguments are constant. */
2131 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2132 && gimple_code (stmt
) != GIMPLE_COND
2133 && gimple_code (stmt
) != GIMPLE_SWITCH
2134 && (gimple_code (stmt
) != GIMPLE_CALL
2135 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2138 /* Stores will stay anyway. */
2139 if (gimple_store_p (stmt
))
2142 is_load
= gimple_assign_load_p (stmt
);
2144 /* Loads can be optimized when the value is known. */
2147 tree op
= gimple_assign_rhs1 (stmt
);
2148 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2155 /* See if we understand all operands before we start
2156 adding conditionals. */
2157 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2159 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2160 /* For arguments we can build a condition. */
2161 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2163 if (TREE_CODE (use
) != SSA_NAME
)
2165 /* If we know when operand is constant,
2166 we still can say something useful. */
2167 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2174 add_condition (summary
, params_summary
,
2175 base_index
, param_type
, &aggpos
,
2176 ipa_predicate::changed
, NULL_TREE
);
2178 op_non_const
= false;
2179 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2181 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2184 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2186 if (index
!= base_index
)
2187 p
= add_condition (summary
, params_summary
, index
,
2188 TREE_TYPE (parm
), NULL
,
2189 ipa_predicate::changed
, NULL_TREE
);
2194 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2195 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2197 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2198 && gimple_op (stmt
, 0)
2199 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2200 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2202 return op_non_const
;
2205 struct record_modified_bb_info
2212 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2213 probability how often it changes between USE_BB.
2214 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2215 is in different loop nest, we can do better.
2216 This is all just estimate. In theory we look for minimal cut separating
2217 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2221 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2223 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2224 if (l
&& l
->header
->count
< init_bb
->count
)
2229 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2230 set except for info->stmt. */
2233 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2235 struct record_modified_bb_info
*info
=
2236 (struct record_modified_bb_info
*) data
;
2237 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2239 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2241 bitmap_set_bit (info
->bb_set
,
2242 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2243 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2245 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2246 gimple_bb (info
->stmt
))->index
);
2249 fprintf (dump_file
, " Param ");
2250 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2251 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2252 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2254 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2255 gimple_bb (info
->stmt
))->index
);
2256 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2261 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2262 will change since last invocation of STMT.
2264 Value 0 is reserved for compile time invariants.
2265 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2266 ought to be REG_BR_PROB_BASE / estimated_iters. */
2269 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2271 tree op
= gimple_call_arg (stmt
, i
);
2272 basic_block bb
= gimple_bb (stmt
);
2274 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2275 op
= TREE_OPERAND (op
, 0);
2277 tree base
= get_base_address (op
);
2279 /* Global invariants never change. */
2280 if (is_gimple_min_invariant (base
))
2283 /* We would have to do non-trivial analysis to really work out what
2284 is the probability of value to change (i.e. when init statement
2285 is in a sibling loop of the call).
2287 We do an conservative estimate: when call is executed N times more often
2288 than the statement defining value, we take the frequency 1/N. */
2289 if (TREE_CODE (base
) == SSA_NAME
)
2291 profile_count init_count
;
2293 if (!bb
->count
.nonzero_p ())
2294 return REG_BR_PROB_BASE
;
2296 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2297 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2299 init_count
= get_minimal_bb
2300 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2301 gimple_bb (stmt
))->count
;
2303 if (init_count
< bb
->count
)
2304 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2305 * REG_BR_PROB_BASE
).to_int (), 1);
2306 return REG_BR_PROB_BASE
;
2311 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2312 struct record_modified_bb_info info
;
2313 tree init
= ctor_for_folding (base
);
2315 if (init
!= error_mark_node
)
2317 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2318 return REG_BR_PROB_BASE
;
2321 fprintf (dump_file
, " Analyzing param change probability of ");
2322 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2323 fprintf (dump_file
, "\n");
2325 ao_ref_init (&refd
, op
);
2328 info
.bb_set
= BITMAP_ALLOC (NULL
);
2330 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2331 NULL
, NULL
, fbi
->aa_walk_budget
);
2333 fbi
->aa_walk_budget
-= walked
;
2334 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2337 fbi
->aa_walk_budget
= 0;
2341 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2343 fprintf (dump_file
, " Set in same BB as used.\n");
2345 BITMAP_FREE (info
.bb_set
);
2346 return REG_BR_PROB_BASE
;
2351 /* Lookup the most frequent update of the value and believe that
2352 it dominates all the other; precise analysis here is difficult. */
2353 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2354 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2357 fprintf (dump_file
, " Set with count ");
2358 max
.dump (dump_file
);
2359 fprintf (dump_file
, " and used with count ");
2360 bb
->count
.dump (dump_file
);
2361 fprintf (dump_file
, " freq %f\n",
2362 max
.to_sreal_scale (bb
->count
).to_double ());
2365 BITMAP_FREE (info
.bb_set
);
2366 if (max
< bb
->count
)
2367 return MAX ((max
.to_sreal_scale (bb
->count
)
2368 * REG_BR_PROB_BASE
).to_int (), 1);
2369 return REG_BR_PROB_BASE
;
2373 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2374 sub-graph and if the predicate the condition depends on is known. If so,
2375 return true and store the pointer the predicate in *P. */
2378 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2379 ipa_fn_summary
*summary
,
2380 class ipa_node_params
*params_summary
,
2383 vec
<ipa_predicate
> nonconstant_names
)
2387 basic_block first_bb
= NULL
;
2389 if (single_pred_p (bb
))
2395 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2397 if (single_succ_p (e
->src
))
2399 if (!single_pred_p (e
->src
))
2402 first_bb
= single_pred (e
->src
);
2403 else if (single_pred (e
->src
) != first_bb
)
2410 else if (e
->src
!= first_bb
)
2418 gcond
*stmt
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (first_bb
));
2420 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2423 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2424 gimple_cond_lhs (stmt
),
2432 /* Given a PHI statement in a function described by inline properties SUMMARY
2433 and *P being the predicate describing whether the selected PHI argument is
2434 known, store a predicate for the result of the PHI statement into
2435 NONCONSTANT_NAMES, if possible. */
2438 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2440 vec
<ipa_predicate
> nonconstant_names
)
2444 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2446 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2447 if (!is_gimple_min_invariant (arg
))
2449 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2450 *p
= p
->or_with (summary
->conds
,
2451 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2459 fprintf (dump_file
, "\t\tphi predicate: ");
2460 p
->dump (dump_file
, summary
->conds
);
2462 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2465 /* For a typical usage of __builtin_expect (a<b, 1), we
2466 may introduce an extra relation stmt:
2467 With the builtin, we have
2470 t3 = __builtin_expect (t2, 1);
2473 Without the builtin, we have
2476 This affects the size/time estimation and may have
2477 an impact on the earlier inlining.
2478 Here find this pattern and fix it up later. */
2481 find_foldable_builtin_expect (basic_block bb
)
2483 gimple_stmt_iterator bsi
;
2485 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2487 gimple
*stmt
= gsi_stmt (bsi
);
2488 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2489 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2490 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2492 tree var
= gimple_call_lhs (stmt
);
2493 tree arg
= gimple_call_arg (stmt
, 0);
2494 use_operand_p use_p
;
2501 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2503 while (TREE_CODE (arg
) == SSA_NAME
)
2505 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2506 if (!is_gimple_assign (stmt_tmp
))
2508 switch (gimple_assign_rhs_code (stmt_tmp
))
2527 arg
= gimple_assign_rhs1 (stmt_tmp
);
2530 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2531 && gimple_code (use_stmt
) == GIMPLE_COND
)
2538 /* Return true when the basic blocks contains only clobbers followed by RESX.
2539 Such BBs are kept around to make removal of dead stores possible with
2540 presence of EH and will be optimized out by optimize_clobbers later in the
2543 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2544 that can be clobber only, too.. When it is false, the RESX is not necessary
2545 on the end of basic block. */
2548 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2550 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2556 if (gsi_end_p (gsi
))
2558 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2562 else if (!single_succ_p (bb
))
2565 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2567 gimple
*stmt
= gsi_stmt (gsi
);
2568 if (is_gimple_debug (stmt
))
2570 if (gimple_clobber_p (stmt
))
2572 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2577 /* See if all predecessors are either throws or clobber only BBs. */
2578 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2579 if (!(e
->flags
& EDGE_EH
)
2580 && !clobber_only_eh_bb_p (e
->src
, false))
2586 /* Return true if STMT compute a floating point expression that may be affected
2587 by -ffast-math and similar flags. */
2590 fp_expression_p (gimple
*stmt
)
2595 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2596 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2601 /* Return true if T references memory location that is local
2602 for the function (that means, dead after return) or read-only. */
2605 refs_local_or_readonly_memory_p (tree t
)
2607 /* Non-escaping memory is fine. */
2608 t
= get_base_address (t
);
2609 if ((TREE_CODE (t
) == MEM_REF
2610 || TREE_CODE (t
) == TARGET_MEM_REF
))
2611 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2613 /* Automatic variables are fine. */
2615 && auto_var_in_fn_p (t
, current_function_decl
))
2618 /* Read-only variables are fine. */
2619 if (DECL_P (t
) && TREE_READONLY (t
))
2625 /* Return true if T is a pointer pointing to memory location that is local
2626 for the function (that means, dead after return) or read-only. */
2629 points_to_local_or_readonly_memory_p (tree t
)
2631 /* See if memory location is clearly invalid. */
2632 if (integer_zerop (t
))
2633 return flag_delete_null_pointer_checks
;
2634 if (TREE_CODE (t
) == SSA_NAME
)
2636 /* For IPA passes we can consinder accesses to return slot local
2637 even if it is not local in the sense that memory is dead by
2638 the end of founction.
2639 The outer function will see a store in the call assignment
2640 and thus this will do right thing for all uses of this
2641 function in the current IPA passes (modref, pure/const discovery
2642 and inlining heuristics). */
2643 if (DECL_RESULT (current_function_decl
)
2644 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl
))
2645 && t
== ssa_default_def (cfun
, DECL_RESULT (current_function_decl
)))
2647 return !ptr_deref_may_alias_global_p (t
, false);
2649 if (TREE_CODE (t
) == ADDR_EXPR
)
2650 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2654 /* Return true if T is a pointer pointing to memory location that is possible
2655 sra candidate if all functions it is passed to are inlined. */
2658 points_to_possible_sra_candidate_p (tree t
)
2660 if (TREE_CODE (t
) != ADDR_EXPR
)
2663 t
= get_base_address (TREE_OPERAND (t
, 0));
2665 /* Automatic variables are fine. */
2667 && auto_var_in_fn_p (t
, current_function_decl
))
2672 /* Analyze function body for NODE.
2673 EARLY indicates run from early optimization pipeline. */
2676 analyze_function_body (struct cgraph_node
*node
, bool early
)
2678 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2679 /* Estimate static overhead for function prologue/epilogue and alignment. */
2680 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2681 /* Benefits are scaled by probability of elimination that is in range
2684 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2686 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2687 ipa_node_params
*params_summary
2688 = early
? NULL
: ipa_node_params_sum
->get (node
);
2689 ipa_predicate bb_predicate
;
2690 struct ipa_func_body_info fbi
;
2691 vec
<ipa_predicate
> nonconstant_names
= vNULL
;
2694 gimple
*fix_builtin_expect_stmt
;
2696 gcc_assert (my_function
&& my_function
->cfg
);
2697 gcc_assert (cfun
== my_function
);
2699 memset(&fbi
, 0, sizeof(fbi
));
2700 vec_free (info
->conds
);
2702 info
->size_time_table
.release ();
2703 info
->call_size_time_table
.release ();
2705 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2706 so we can produce proper inline hints.
2708 When optimizing and analyzing for early inliner, initialize node params
2709 so we can produce correct BB predicates. */
2711 if (opt_for_fn (node
->decl
, optimize
))
2713 calculate_dominance_info (CDI_DOMINATORS
);
2714 calculate_dominance_info (CDI_POST_DOMINATORS
);
2716 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2719 ipa_check_create_node_params ();
2720 ipa_initialize_node_params (node
);
2723 if (ipa_node_params_sum
)
2726 fbi
.info
= ipa_node_params_sum
->get (node
);
2727 fbi
.bb_infos
= vNULL
;
2728 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2729 fbi
.param_count
= count_formal_params (node
->decl
);
2730 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2732 nonconstant_names
.safe_grow_cleared
2733 (SSANAMES (my_function
)->length (), true);
2738 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2739 node
->dump_name ());
2741 /* When we run into maximal number of entries, we assign everything to the
2742 constant truth case. Be sure to have it in list. */
2743 bb_predicate
= true;
2744 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2746 bb_predicate
= ipa_predicate::not_inlined ();
2747 info
->account_size_time (opt_for_fn (node
->decl
,
2748 param_uninlined_function_insns
)
2749 * ipa_fn_summary::size_scale
,
2750 opt_for_fn (node
->decl
,
2751 param_uninlined_function_time
),
2755 /* Only look for target information for inlinable functions. */
2756 bool scan_for_target_info
=
2758 && targetm
.target_option
.need_ipa_fn_target_info (node
->decl
,
2762 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2763 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2764 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2765 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2766 for (n
= 0; n
< nblocks
; n
++)
2768 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2769 freq
= bb
->count
.to_sreal_scale (entry_count
);
2770 if (clobber_only_eh_bb_p (bb
))
2772 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2773 fprintf (dump_file
, "\n Ignoring BB %i;"
2774 " it will be optimized away by cleanup_clobbers\n",
2779 /* TODO: Obviously predicates can be propagated down across CFG. */
2783 bb_predicate
= *(ipa_predicate
*)bb
->aux
;
2785 bb_predicate
= false;
2788 bb_predicate
= true;
2790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2792 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2793 bb_predicate
.dump (dump_file
, info
->conds
);
2796 if (fbi
.info
&& nonconstant_names
.exists ())
2798 ipa_predicate phi_predicate
;
2799 bool first_phi
= true;
2801 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2805 && !phi_result_unknown_predicate (&fbi
, info
,
2812 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2814 fprintf (dump_file
, " ");
2815 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2817 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2822 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2824 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2825 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2827 gimple
*stmt
= gsi_stmt (bsi
);
2828 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2829 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2831 ipa_predicate will_be_nonconstant
;
2833 /* This relation stmt should be folded after we remove
2834 __builtin_expect call. Adjust the cost here. */
2835 if (stmt
== fix_builtin_expect_stmt
)
2841 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2843 fprintf (dump_file
, " ");
2844 print_gimple_stmt (dump_file
, stmt
, 0);
2845 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2846 freq
.to_double (), this_size
,
2850 if (is_gimple_call (stmt
)
2851 && !gimple_call_internal_p (stmt
))
2853 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2854 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2856 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2857 resolved as constant. We however don't want to optimize
2858 out the cgraph edges. */
2859 if (nonconstant_names
.exists ()
2860 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2861 && gimple_call_lhs (stmt
)
2862 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2864 ipa_predicate false_p
= false;
2865 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2868 if (ipa_node_params_sum
)
2870 int count
= gimple_call_num_args (stmt
);
2874 es
->param
.safe_grow_cleared (count
, true);
2875 for (i
= 0; i
< count
; i
++)
2877 int prob
= param_change_prob (&fbi
, stmt
, i
);
2878 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2879 es
->param
[i
].change_prob
= prob
;
2880 es
->param
[i
].points_to_local_or_readonly_memory
2881 = points_to_local_or_readonly_memory_p
2882 (gimple_call_arg (stmt
, i
));
2883 es
->param
[i
].points_to_possible_sra_candidate
2884 = points_to_possible_sra_candidate_p
2885 (gimple_call_arg (stmt
, i
));
2888 /* We cannot setup VLA parameters during inlining. */
2889 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2890 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2892 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2895 es
->call_stmt_size
= this_size
;
2896 es
->call_stmt_time
= this_time
;
2897 es
->loop_depth
= bb_loop_depth (bb
);
2898 edge_set_predicate (edge
, &bb_predicate
);
2899 if (edge
->speculative
)
2901 cgraph_edge
*indirect
2902 = edge
->speculative_call_indirect_edge ();
2903 ipa_call_summary
*es2
2904 = ipa_call_summaries
->get_create (indirect
);
2905 ipa_call_summaries
->duplicate (edge
, indirect
,
2908 /* Edge is the first direct call.
2909 create and duplicate call summaries for multiple
2910 speculative call targets. */
2911 for (cgraph_edge
*direct
2912 = edge
->next_speculative_call_target ();
2914 direct
= direct
->next_speculative_call_target ())
2916 ipa_call_summary
*es3
2917 = ipa_call_summaries
->get_create (direct
);
2918 ipa_call_summaries
->duplicate (edge
, direct
,
2924 /* TODO: When conditional jump or switch is known to be constant, but
2925 we did not translate it into the predicates, we really can account
2926 just maximum of the possible paths. */
2929 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2930 stmt
, nonconstant_names
);
2932 will_be_nonconstant
= true;
2933 if (this_time
|| this_size
)
2935 sreal final_time
= (sreal
)this_time
* freq
;
2936 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2937 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2939 "\t\t50%% will be eliminated by inlining\n");
2940 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2941 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2943 ipa_predicate p
= bb_predicate
& will_be_nonconstant
;
2944 int parm
= load_or_store_of_ptr_parameter (&fbi
, stmt
);
2945 ipa_predicate sra_predicate
= true;
2947 sra_predicate
&= add_condition (info
, params_summary
, parm
,
2948 ptr_type_node
, NULL
,
2949 ipa_predicate::not_sra_candidate
, NULL
, 0);
2951 /* We can ignore statement when we proved it is never going
2952 to happen, but we cannot do that for call statements
2953 because edges are accounted specially. */
2955 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2961 /* We account everything but the calls. Calls have their own
2962 size/time info attached to cgraph edges. This is necessary
2963 in order to make the cost disappear after inlining. */
2964 if (!is_gimple_call (stmt
))
2969 = bb_predicate
& ipa_predicate::not_inlined () & sra_predicate
;
2970 info
->account_size_time (this_size
* prob
,
2971 (final_time
* prob
) / 2, ip
,
2975 info
->account_size_time (this_size
* (2 - prob
),
2976 (final_time
* (2 - prob
) / 2),
2977 bb_predicate
& sra_predicate
,
2981 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2983 info
->fp_expressions
= true;
2985 fprintf (dump_file
, " fp_expression set\n");
2989 /* For target specific information, we want to scan all statements
2990 rather than those statements with non-zero weights, to avoid
2991 missing to scan something interesting for target information,
2992 such as: internal function calls. */
2993 if (scan_for_target_info
)
2994 scan_for_target_info
=
2995 targetm
.target_option
.update_ipa_fn_target_info
2996 (info
->target_info
, stmt
);
2998 /* Account cost of address calculations in the statements. */
2999 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
3001 for (tree op
= gimple_op (stmt
, i
);
3002 op
&& handled_component_p (op
);
3003 op
= TREE_OPERAND (op
, 0))
3004 if ((TREE_CODE (op
) == ARRAY_REF
3005 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
3006 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
3008 ipa_predicate p
= bb_predicate
;
3010 p
= p
& will_be_nonconstant_expr_predicate
3011 (&fbi
, info
, params_summary
,
3012 TREE_OPERAND (op
, 1),
3020 "\t\tAccounting address calculation.\n");
3021 info
->account_size_time (ipa_fn_summary::size_scale
,
3033 if (nonconstant_names
.exists () && !early
)
3035 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3036 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
3037 param_ipa_max_loop_predicates
);
3039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3040 flow_loops_dump (dump_file
, NULL
, 0);
3042 for (auto loop
: loops_list (cfun
, 0))
3044 ipa_predicate loop_iterations
= true;
3048 class tree_niter_desc niter_desc
;
3049 if (!loop
->header
->aux
)
3052 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
3053 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
3055 bb_predicate
= *(ipa_predicate
*)loop
->header
->aux
;
3056 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
3057 FOR_EACH_VEC_ELT (exits
, j
, ex
)
3058 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
3059 && !is_gimple_min_invariant (niter_desc
.niter
))
3061 ipa_predicate will_be_nonconstant
3062 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3066 if (will_be_nonconstant
!= true)
3067 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3068 if (will_be_nonconstant
!= true
3069 && will_be_nonconstant
!= false)
3070 loop_iterations
&= will_be_nonconstant
;
3072 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
3073 phdr_freq
, max_loop_predicates
);
3076 /* To avoid quadratic behavior we analyze stride predicates only
3077 with respect to the containing loop. Thus we simply iterate
3078 over all defs in the outermost loop body. */
3079 for (class loop
*loop
= loops_for_fn (cfun
)->tree_root
->inner
;
3080 loop
!= NULL
; loop
= loop
->next
)
3082 ipa_predicate loop_stride
= true;
3083 basic_block
*body
= get_loop_body (loop
);
3084 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
3085 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
3086 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
3088 gimple_stmt_iterator gsi
;
3092 bb_predicate
= *(ipa_predicate
*)body
[i
]->aux
;
3093 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
3096 gimple
*stmt
= gsi_stmt (gsi
);
3098 if (!is_gimple_assign (stmt
))
3101 tree def
= gimple_assign_lhs (stmt
);
3102 if (TREE_CODE (def
) != SSA_NAME
)
3106 if (!simple_iv (loop_containing_stmt (stmt
),
3107 loop_containing_stmt (stmt
),
3109 || is_gimple_min_invariant (iv
.step
))
3112 ipa_predicate will_be_nonconstant
3113 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3117 if (will_be_nonconstant
!= true)
3118 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3119 if (will_be_nonconstant
!= true
3120 && will_be_nonconstant
!= false)
3121 loop_stride
= loop_stride
& will_be_nonconstant
;
3124 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3125 phdr_freq
, max_loop_predicates
);
3130 FOR_ALL_BB_FN (bb
, my_function
)
3136 edge_predicate_pool
.remove ((ipa_predicate
*)bb
->aux
);
3138 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3141 edge_predicate_pool
.remove ((ipa_predicate
*)e
->aux
);
3145 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3146 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3148 ss
->self_size
= size
;
3149 nonconstant_names
.release ();
3150 ipa_release_body_info (&fbi
);
3151 if (opt_for_fn (node
->decl
, optimize
))
3154 loop_optimizer_finalize ();
3155 else if (!ipa_edge_args_sum
)
3156 ipa_free_all_node_params ();
3157 free_dominance_info (CDI_DOMINATORS
);
3158 free_dominance_info (CDI_POST_DOMINATORS
);
3162 fprintf (dump_file
, "\n");
3163 ipa_dump_fn_summary (dump_file
, node
);
3168 /* Compute function summary.
3169 EARLY is true when we compute parameters during early opts. */
3172 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3174 HOST_WIDE_INT self_stack_size
;
3175 struct cgraph_edge
*e
;
3177 gcc_assert (!node
->inlined_to
);
3179 if (!ipa_fn_summaries
)
3180 ipa_fn_summary_alloc ();
3182 /* Create a new ipa_fn_summary. */
3183 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3184 ipa_fn_summaries
->remove (node
);
3185 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3186 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3188 /* Estimate the stack size for the function if we're optimizing. */
3189 self_stack_size
= optimize
&& !node
->thunk
3190 ? estimated_stack_frame_size (node
) : 0;
3191 size_info
->estimated_self_stack_size
= self_stack_size
;
3192 info
->estimated_stack_size
= self_stack_size
;
3196 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3197 ipa_predicate t
= true;
3199 node
->can_change_signature
= false;
3200 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3201 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3202 info
->account_size_time (ipa_fn_summary::size_scale
3203 * opt_for_fn (node
->decl
,
3204 param_uninlined_function_thunk_insns
),
3205 opt_for_fn (node
->decl
,
3206 param_uninlined_function_thunk_time
), t
, t
);
3207 t
= ipa_predicate::not_inlined ();
3208 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3209 ipa_update_overall_fn_summary (node
);
3210 size_info
->self_size
= size_info
->size
;
3211 if (stdarg_p (TREE_TYPE (node
->decl
)))
3213 info
->inlinable
= false;
3214 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3217 info
->inlinable
= true;
3221 /* Even is_gimple_min_invariant rely on current_function_decl. */
3222 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3224 /* During IPA profile merging we may be called w/o virtual SSA form
3226 update_ssa (TODO_update_ssa_only_virtuals
);
3228 /* Can this function be inlined at all? */
3229 if (!opt_for_fn (node
->decl
, optimize
)
3230 && !lookup_attribute ("always_inline",
3231 DECL_ATTRIBUTES (node
->decl
)))
3232 info
->inlinable
= false;
3234 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3236 bool no_signature
= false;
3237 /* Type attributes can use parameter indices to describe them.
3238 Special case fn spec since we can safely preserve them in
3239 modref summaries. */
3240 for (tree list
= TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
));
3241 list
&& !no_signature
; list
= TREE_CHAIN (list
))
3242 if (!ipa_param_adjustments::type_attribute_allowed_p
3243 (get_attribute_name (list
)))
3247 fprintf (dump_file
, "No signature change:"
3248 " function type has unhandled attribute %s.\n",
3249 IDENTIFIER_POINTER (get_attribute_name (list
)));
3251 no_signature
= true;
3253 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
3254 parm
&& !no_signature
; parm
= DECL_CHAIN (parm
))
3255 if (variably_modified_type_p (TREE_TYPE (parm
), node
->decl
))
3259 fprintf (dump_file
, "No signature change:"
3260 " has parameter with variably modified type.\n");
3262 no_signature
= true;
3265 /* Likewise for #pragma omp declare simd functions or functions
3266 with simd attribute. */
3268 || lookup_attribute ("omp declare simd",
3269 DECL_ATTRIBUTES (node
->decl
)))
3270 node
->can_change_signature
= false;
3273 /* Otherwise, inlinable functions always can change signature. */
3274 if (info
->inlinable
)
3275 node
->can_change_signature
= true;
3278 /* Functions calling builtin_apply cannot change signature. */
3279 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3281 tree
cdecl = e
->callee
->decl
;
3282 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
,
3286 node
->can_change_signature
= !e
;
3289 analyze_function_body (node
, early
);
3293 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3294 size_info
->size
= size_info
->self_size
;
3295 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3297 /* Code above should compute exactly the same result as
3298 ipa_update_overall_fn_summary except for case when speculative
3299 edges are present since these are accounted to size but not
3300 self_size. Do not compare time since different order the roundoff
3301 errors result in slight changes. */
3302 ipa_update_overall_fn_summary (node
);
3305 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3308 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3313 /* Compute parameters of functions used by inliner using
3314 current_function_decl. */
3317 compute_fn_summary_for_current (void)
3319 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3323 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3324 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3325 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3328 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3329 int *size
, int *time
,
3330 ipa_call_arg_values
*avals
)
3333 struct cgraph_node
*callee
;
3334 class ipa_fn_summary
*isummary
;
3335 enum availability avail
;
3339 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3341 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3344 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3345 if (!target
|| speculative
)
3348 /* Account for difference in cost between indirect and direct calls. */
3349 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3350 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3351 gcc_checking_assert (*time
>= 0);
3352 gcc_checking_assert (*size
>= 0);
3354 callee
= cgraph_node::get (target
);
3355 if (!callee
|| !callee
->definition
)
3357 callee
= callee
->function_symbol (&avail
);
3358 if (avail
< AVAIL_AVAILABLE
)
3360 isummary
= ipa_fn_summaries
->get (callee
);
3361 if (isummary
== NULL
)
3364 return isummary
->inlinable
;
3367 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3368 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3369 devirtualized. AVALS, if non-NULL, describes the context of the call site
3370 as far as values of parameters are concerened. */
3373 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3374 sreal
*time
, ipa_call_arg_values
*avals
,
3377 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3378 int call_size
= es
->call_stmt_size
;
3379 int call_time
= es
->call_stmt_time
;
3382 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3383 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3384 *hints
|= INLINE_HINT_indirect_call
;
3385 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3388 *min_size
+= cur_size
;
3390 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3394 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3395 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3398 Helper for estimate_calls_size_and_time which does the same but
3399 (in most cases) faster. */
3402 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3403 int *min_size
, sreal
*time
,
3405 clause_t possible_truths
,
3406 ipa_call_arg_values
*avals
)
3408 struct cgraph_edge
*e
;
3409 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3411 if (!e
->inline_failed
)
3413 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3414 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3415 hints
, possible_truths
, avals
);
3419 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3421 /* Do not care about zero sized builtins. */
3422 if (!es
->call_stmt_size
)
3424 gcc_checking_assert (!es
->call_stmt_time
);
3428 || es
->predicate
->evaluate (possible_truths
))
3430 /* Predicates of calls shall not use NOT_CHANGED codes,
3431 so we do not need to compute probabilities. */
3432 estimate_edge_size_and_time (e
, size
,
3433 es
->predicate
? NULL
: min_size
,
3434 time
, avals
, hints
);
3437 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3439 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3441 || es
->predicate
->evaluate (possible_truths
))
3442 estimate_edge_size_and_time (e
, size
,
3443 es
->predicate
? NULL
: min_size
,
3444 time
, avals
, hints
);
3448 /* Populate sum->call_size_time_table for edges from NODE. */
3451 summarize_calls_size_and_time (struct cgraph_node
*node
,
3452 ipa_fn_summary
*sum
)
3454 struct cgraph_edge
*e
;
3455 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3457 if (!e
->inline_failed
)
3459 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3460 summarize_calls_size_and_time (e
->callee
, sum
);
3466 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3468 ipa_predicate pred
= true;
3469 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3472 pred
= *es
->predicate
;
3473 sum
->account_size_time (size
, time
, pred
, pred
, true);
3475 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3480 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3481 ipa_predicate pred
= true;
3482 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3485 pred
= *es
->predicate
;
3486 sum
->account_size_time (size
, time
, pred
, pred
, true);
3490 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3491 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3492 context of the call site. */
3495 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3496 int *min_size
, sreal
*time
,
3498 clause_t possible_truths
,
3499 ipa_call_arg_values
*avals
)
3501 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3502 bool use_table
= true;
3504 gcc_assert (node
->callees
|| node
->indirect_calls
);
3506 /* During early inlining we do not calculate info for very
3507 large functions and thus there is no need for producing
3509 if (!ipa_node_params_sum
)
3511 /* Do not calculate summaries for simple wrappers; it is waste
3513 else if (node
->callees
&& node
->indirect_calls
3514 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3516 /* If there is an indirect edge that may be optimized, we need
3517 to go the slow way. */
3518 else if (avals
&& hints
3519 && (avals
->m_known_vals
.length ()
3520 || avals
->m_known_contexts
.length ()
3521 || avals
->m_known_aggs
.length ()))
3523 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (node
);
3524 unsigned int nargs
= params_summary
3525 ? ipa_get_param_count (params_summary
) : 0;
3527 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3529 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3530 && (avals
->safe_sval_at (i
)
3531 || (ipa_argagg_value_list (avals
).value_for_index_p (i
))))
3533 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3534 && (avals
->m_known_contexts
.length () > i
3535 && !avals
->m_known_contexts
[i
].useless_p ()))
3540 /* Fast path is via the call size time table. */
3543 /* Build summary if it is absent. */
3544 if (!sum
->call_size_time_table
.length ())
3546 ipa_predicate true_pred
= true;
3547 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3548 summarize_calls_size_and_time (node
, sum
);
3551 int old_size
= *size
;
3552 sreal old_time
= time
? *time
: 0;
3555 *min_size
+= sum
->call_size_time_table
[0].size
;
3560 /* Walk the table and account sizes and times. */
3561 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3563 if (e
->exec_predicate
.evaluate (possible_truths
))
3570 /* Be careful and see if both methods agree. */
3571 if ((flag_checking
|| dump_file
)
3572 /* Do not try to sanity check when we know we lost some
3574 && sum
->call_size_time_table
.length ()
3575 < ipa_fn_summary::max_size_time_table_size
)
3577 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3578 possible_truths
, avals
);
3579 gcc_assert (*size
== old_size
);
3580 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3582 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3583 old_time
.to_double (),
3584 time
->to_double ());
3587 /* Slow path by walking all edges. */
3589 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3590 possible_truths
, avals
);
3593 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3594 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3597 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3598 clause_t nonspec_possible_truths
,
3599 vec
<inline_param_summary
>
3600 inline_param_summary
,
3601 ipa_auto_call_arg_values
*arg_values
)
3602 : m_node (node
), m_possible_truths (possible_truths
),
3603 m_nonspec_possible_truths (nonspec_possible_truths
),
3604 m_inline_param_summary (inline_param_summary
),
3605 m_avals (arg_values
)
3609 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3612 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3614 m_node
= ctx
.m_node
;
3615 m_possible_truths
= ctx
.m_possible_truths
;
3616 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3617 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3618 unsigned int nargs
= params_summary
3619 ? ipa_get_param_count (params_summary
) : 0;
3621 m_inline_param_summary
= vNULL
;
3622 /* Copy the info only if there is at least one useful entry. */
3623 if (ctx
.m_inline_param_summary
.exists ())
3625 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3627 for (unsigned int i
= 0; i
< n
; i
++)
3628 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3629 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3631 m_inline_param_summary
3632 = ctx
.m_inline_param_summary
.copy ();
3636 m_avals
.m_known_vals
= vNULL
;
3637 if (ctx
.m_avals
.m_known_vals
.exists ())
3639 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3641 for (unsigned int i
= 0; i
< n
; i
++)
3642 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3643 && ctx
.m_avals
.m_known_vals
[i
])
3645 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3650 m_avals
.m_known_contexts
= vNULL
;
3651 if (ctx
.m_avals
.m_known_contexts
.exists ())
3653 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3655 for (unsigned int i
= 0; i
< n
; i
++)
3656 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3657 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3659 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3664 m_avals
.m_known_aggs
= vNULL
;
3665 if (ctx
.m_avals
.m_known_aggs
.exists ())
3667 const ipa_argagg_value_list
avl (&ctx
.m_avals
);
3668 for (unsigned int i
= 0; i
< nargs
; i
++)
3669 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3670 && avl
.value_for_index_p (i
))
3672 m_avals
.m_known_aggs
= ctx
.m_avals
.m_known_aggs
.copy ();
3677 m_avals
.m_known_value_ranges
= vNULL
;
3680 /* Release memory used by known_vals/contexts/aggs vectors. and
3681 inline_param_summary. */
3684 ipa_cached_call_context::release ()
3686 /* See if context is initialized at first place. */
3689 m_avals
.m_known_aggs
.release ();
3690 m_avals
.m_known_vals
.release ();
3691 m_avals
.m_known_contexts
.release ();
3692 m_inline_param_summary
.release ();
3695 /* Return true if CTX describes the same call context as THIS. */
3698 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3700 if (m_node
!= ctx
.m_node
3701 || m_possible_truths
!= ctx
.m_possible_truths
3702 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3705 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3706 unsigned int nargs
= params_summary
3707 ? ipa_get_param_count (params_summary
) : 0;
3709 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3711 for (unsigned int i
= 0; i
< nargs
; i
++)
3713 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3715 if (i
>= m_inline_param_summary
.length ()
3716 || m_inline_param_summary
[i
].useless_p ())
3718 if (i
< ctx
.m_inline_param_summary
.length ()
3719 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3723 if (i
>= ctx
.m_inline_param_summary
.length ()
3724 || ctx
.m_inline_param_summary
[i
].useless_p ())
3726 if (i
< m_inline_param_summary
.length ()
3727 && !m_inline_param_summary
[i
].useless_p ())
3731 if (!m_inline_param_summary
[i
].equal_to
3732 (ctx
.m_inline_param_summary
[i
]))
3736 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3738 for (unsigned int i
= 0; i
< nargs
; i
++)
3740 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3742 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3744 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3745 && ctx
.m_avals
.m_known_vals
[i
])
3749 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3750 || !ctx
.m_avals
.m_known_vals
[i
])
3752 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3756 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3760 if (m_avals
.m_known_contexts
.exists ()
3761 || ctx
.m_avals
.m_known_contexts
.exists ())
3763 for (unsigned int i
= 0; i
< nargs
; i
++)
3765 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3767 if (i
>= m_avals
.m_known_contexts
.length ()
3768 || m_avals
.m_known_contexts
[i
].useless_p ())
3770 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3771 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3775 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3776 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3778 if (i
< m_avals
.m_known_contexts
.length ()
3779 && !m_avals
.m_known_contexts
[i
].useless_p ())
3783 if (!m_avals
.m_known_contexts
[i
].equal_to
3784 (ctx
.m_avals
.m_known_contexts
[i
]))
3788 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3790 unsigned i
= 0, j
= 0;
3791 while (i
< m_avals
.m_known_aggs
.length ()
3792 || j
< ctx
.m_avals
.m_known_aggs
.length ())
3794 if (i
>= m_avals
.m_known_aggs
.length ())
3796 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3797 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3802 if (j
>= ctx
.m_avals
.m_known_aggs
.length ())
3804 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3805 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3811 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3812 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3815 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3822 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3827 if (!ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3834 if ((m_avals
.m_known_aggs
[i
].unit_offset
3835 != ctx
.m_avals
.m_known_aggs
[j
].unit_offset
)
3836 || (m_avals
.m_known_aggs
[i
].by_ref
3837 != ctx
.m_avals
.m_known_aggs
[j
].by_ref
)
3838 || !operand_equal_p (m_avals
.m_known_aggs
[i
].value
,
3839 ctx
.m_avals
.m_known_aggs
[j
].value
))
3848 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3849 this context. Always compute size and min_size. Only compute time and
3850 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3854 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3855 bool est_times
, bool est_hints
)
3857 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3862 ipa_hints hints
= 0;
3863 sreal loops_with_known_iterations
= 0;
3864 sreal loops_with_known_strides
= 0;
3867 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3870 fprintf (dump_file
, " Estimating body: %s\n"
3871 " Known to be false: ", m_node
->dump_name ());
3873 for (i
= ipa_predicate::not_inlined_condition
;
3874 i
< (ipa_predicate::first_dynamic_condition
3875 + (int) vec_safe_length (info
->conds
)); i
++)
3876 if (!(m_possible_truths
& (1 << i
)))
3879 fprintf (dump_file
, ", ");
3881 dump_condition (dump_file
, info
->conds
, i
);
3885 if (m_node
->callees
|| m_node
->indirect_calls
)
3886 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3887 est_times
? &time
: NULL
,
3888 est_hints
? &hints
: NULL
, m_possible_truths
,
3891 sreal nonspecialized_time
= time
;
3893 min_size
+= info
->size_time_table
[0].size
;
3894 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3896 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3898 /* Because predicates are conservative, it can happen that nonconst is 1
3902 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3904 gcc_checking_assert (e
->time
>= 0);
3905 gcc_checking_assert (time
>= 0);
3907 /* We compute specialized size only because size of nonspecialized
3908 copy is context independent.
3910 The difference between nonspecialized execution and specialized is
3911 that nonspecialized is not going to have optimized out computations
3912 known to be constant in a specialized setting. */
3917 nonspecialized_time
+= e
->time
;
3920 else if (!m_inline_param_summary
.exists ())
3927 int prob
= e
->nonconst_predicate
.probability
3928 (info
->conds
, m_possible_truths
,
3929 m_inline_param_summary
);
3930 gcc_checking_assert (prob
>= 0);
3931 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3932 if (prob
== REG_BR_PROB_BASE
)
3935 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3937 gcc_checking_assert (time
>= 0);
3940 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3941 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3942 gcc_checking_assert (min_size
>= 0);
3943 gcc_checking_assert (size
>= 0);
3944 gcc_checking_assert (time
>= 0);
3945 /* nonspecialized_time should be always bigger than specialized time.
3946 Roundoff issues however may get into the way. */
3947 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3949 /* Roundoff issues may make specialized time bigger than nonspecialized
3950 time. We do not really want that to happen because some heuristics
3951 may get confused by seeing negative speedups. */
3952 if (time
> nonspecialized_time
)
3953 time
= nonspecialized_time
;
3958 hints
|= INLINE_HINT_in_scc
;
3959 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3960 hints
|= INLINE_HINT_declared_inline
;
3961 if (info
->builtin_constant_p_parms
.length ()
3962 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3963 hints
|= INLINE_HINT_builtin_constant_p
;
3965 ipa_freqcounting_predicate
*fcp
;
3966 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3967 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3969 hints
|= INLINE_HINT_loop_iterations
;
3970 loops_with_known_iterations
+= fcp
->freq
;
3972 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3974 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3975 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3977 hints
|= INLINE_HINT_loop_stride
;
3978 loops_with_known_strides
+= fcp
->freq
;
3980 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3983 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3984 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3986 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3988 fprintf (dump_file
, "\n size:%i", (int) size
);
3990 fprintf (dump_file
, " time:%f nonspec time:%f",
3991 time
.to_double (), nonspecialized_time
.to_double ());
3993 fprintf (dump_file
, " loops with known iterations:%f "
3994 "known strides:%f", loops_with_known_iterations
.to_double (),
3995 loops_with_known_strides
.to_double ());
3996 fprintf (dump_file
, "\n");
4000 estimates
->time
= time
;
4001 estimates
->nonspecialized_time
= nonspecialized_time
;
4003 estimates
->size
= size
;
4004 estimates
->min_size
= min_size
;
4006 estimates
->hints
= hints
;
4011 /* Estimate size and time needed to execute callee of EDGE assuming that
4012 parameters known to be constant at caller of EDGE are propagated.
4013 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
4014 and types for parameters. */
4017 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
4018 ipa_auto_call_arg_values
*avals
,
4019 ipa_call_estimates
*estimates
)
4021 clause_t clause
, nonspec_clause
;
4023 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
4024 &nonspec_clause
, NULL
);
4025 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
4026 ctx
.estimate_size_and_time (estimates
);
4029 /* Return stack frame offset where frame of NODE is supposed to start inside
4030 of the function it is inlined to.
4031 Return 0 for functions that are not inlined. */
4034 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
4036 HOST_WIDE_INT offset
= 0;
4037 if (!node
->inlined_to
)
4039 node
= node
->callers
->caller
;
4042 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
4043 if (!node
->inlined_to
)
4045 node
= node
->callers
->caller
;
4050 /* Update summary information of inline clones after inlining.
4051 Compute peak stack usage. */
4054 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
4056 struct cgraph_edge
*e
;
4058 ipa_propagate_frequency (node
);
4059 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4061 if (!e
->inline_failed
)
4062 inline_update_callee_summaries (e
->callee
, depth
);
4064 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
4066 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4067 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
4070 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
4071 INLINED_EDGE has been inlined.
4073 When function A is inlined in B and A calls C with parameter that
4074 changes with probability PROB1 and C is known to be passthrough
4075 of argument if B that change with probability PROB2, the probability
4076 of change is now PROB1*PROB2. */
4079 remap_edge_params (struct cgraph_edge
*inlined_edge
,
4080 struct cgraph_edge
*edge
)
4082 if (ipa_node_params_sum
)
4085 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4088 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4089 class ipa_call_summary
*inlined_es
4090 = ipa_call_summaries
->get (inlined_edge
);
4092 if (es
->param
.length () == 0)
4095 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
4097 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4098 if (jfunc
->type
== IPA_JF_PASS_THROUGH
4099 || jfunc
->type
== IPA_JF_ANCESTOR
)
4101 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
4102 ? ipa_get_jf_pass_through_formal_id (jfunc
)
4103 : ipa_get_jf_ancestor_formal_id (jfunc
);
4104 if (id
< (int) inlined_es
->param
.length ())
4106 int prob1
= es
->param
[i
].change_prob
;
4107 int prob2
= inlined_es
->param
[id
].change_prob
;
4108 int prob
= combine_probabilities (prob1
, prob2
);
4110 if (prob1
&& prob2
&& !prob
)
4113 es
->param
[i
].change_prob
= prob
;
4116 ->param
[id
].points_to_local_or_readonly_memory
)
4117 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4119 ->param
[id
].points_to_possible_sra_candidate
)
4120 es
->param
[i
].points_to_possible_sra_candidate
= true;
4122 if (!es
->param
[i
].points_to_local_or_readonly_memory
4123 && jfunc
->type
== IPA_JF_CONST
4124 && points_to_local_or_readonly_memory_p
4125 (ipa_get_jf_constant (jfunc
)))
4126 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4132 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4134 Remap predicates of callees of NODE. Rest of arguments match
4137 Also update change probabilities. */
4140 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
4141 struct cgraph_node
*node
,
4142 class ipa_fn_summary
*info
,
4143 class ipa_node_params
*params_summary
,
4144 class ipa_fn_summary
*callee_info
,
4145 const vec
<int> &operand_map
,
4146 const vec
<HOST_WIDE_INT
> &offset_map
,
4147 clause_t possible_truths
,
4148 ipa_predicate
*toplev_predicate
)
4150 struct cgraph_edge
*e
, *next
;
4151 for (e
= node
->callees
; e
; e
= next
)
4154 next
= e
->next_callee
;
4156 if (e
->inline_failed
)
4158 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4159 remap_edge_params (inlined_edge
, e
);
4163 p
= es
->predicate
->remap_after_inlining
4164 (info
, params_summary
,
4165 callee_info
, operand_map
,
4166 offset_map
, possible_truths
,
4168 edge_set_predicate (e
, &p
);
4171 edge_set_predicate (e
, toplev_predicate
);
4174 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
4175 params_summary
, callee_info
,
4176 operand_map
, offset_map
, possible_truths
,
4179 for (e
= node
->indirect_calls
; e
; e
= next
)
4181 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4183 next
= e
->next_callee
;
4185 remap_edge_params (inlined_edge
, e
);
4188 p
= es
->predicate
->remap_after_inlining
4189 (info
, params_summary
,
4190 callee_info
, operand_map
, offset_map
,
4191 possible_truths
, *toplev_predicate
);
4192 edge_set_predicate (e
, &p
);
4195 edge_set_predicate (e
, toplev_predicate
);
4199 /* Run remap_after_inlining on each predicate in V. */
4202 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4203 class ipa_node_params
*params_summary
,
4204 class ipa_fn_summary
*callee_info
,
4205 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4206 const vec
<int> &operand_map
,
4207 const vec
<HOST_WIDE_INT
> &offset_map
,
4208 clause_t possible_truths
,
4209 ipa_predicate
*toplev_predicate
)
4212 ipa_freqcounting_predicate
*fcp
;
4213 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4216 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4217 callee_info
, operand_map
,
4218 offset_map
, possible_truths
,
4220 if (p
!= false && p
!= true)
4221 *fcp
->predicate
&= p
;
4225 /* We inlined EDGE. Update summary of the function we inlined into. */
4228 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4230 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4231 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4232 ? edge
->caller
->inlined_to
: edge
->caller
);
4233 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4234 clause_t clause
= 0; /* not_inline is known to be false. */
4236 auto_vec
<int, 8> operand_map
;
4237 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4239 ipa_predicate toplev_predicate
;
4240 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4241 ipa_node_params
*params_summary
= (ipa_node_params_sum
4242 ? ipa_node_params_sum
->get (to
) : NULL
);
4245 toplev_predicate
= *es
->predicate
;
4247 toplev_predicate
= true;
4249 info
->fp_expressions
|= callee_info
->fp_expressions
;
4250 info
->target_info
|= callee_info
->target_info
;
4252 if (callee_info
->conds
)
4254 ipa_auto_call_arg_values avals
;
4255 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4257 if (ipa_node_params_sum
&& callee_info
->conds
)
4259 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4260 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4265 operand_map
.safe_grow_cleared (count
, true);
4266 offset_map
.safe_grow_cleared (count
, true);
4268 for (i
= 0; i
< count
; i
++)
4270 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4273 /* TODO: handle non-NOPs when merging. */
4274 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4276 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4277 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4278 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4281 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4283 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4284 if (offset
>= 0 && offset
< INT_MAX
)
4286 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4287 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4289 offset_map
[i
] = offset
;
4292 operand_map
[i
] = map
;
4293 gcc_assert (map
< ipa_get_param_count (params_summary
));
4297 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4298 if (ip
< count
&& operand_map
[ip
] >= 0)
4299 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4301 sreal freq
= edge
->sreal_frequency ();
4302 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4305 p
= e
->exec_predicate
.remap_after_inlining
4306 (info
, params_summary
,
4307 callee_info
, operand_map
,
4310 ipa_predicate nonconstp
;
4311 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4312 (info
, params_summary
,
4313 callee_info
, operand_map
,
4316 if (p
!= false && nonconstp
!= false)
4318 sreal add_time
= ((sreal
)e
->time
* freq
);
4319 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4321 if (prob
!= REG_BR_PROB_BASE
)
4322 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4323 if (prob
!= REG_BR_PROB_BASE
4324 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4326 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4327 (double) prob
/ REG_BR_PROB_BASE
);
4329 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4332 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4333 callee_info
, operand_map
,
4334 offset_map
, clause
, &toplev_predicate
);
4335 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4336 info
->loop_iterations
, operand_map
,
4337 offset_map
, clause
, &toplev_predicate
);
4338 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4339 info
->loop_strides
, operand_map
,
4340 offset_map
, clause
, &toplev_predicate
);
4342 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4343 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4345 if (info
->estimated_stack_size
< peak
)
4346 info
->estimated_stack_size
= peak
;
4348 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4349 if (info
->call_size_time_table
.length ())
4352 sreal edge_time
= 0;
4354 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4355 /* Unaccount size and time of the optimized out call. */
4356 info
->account_size_time (-edge_size
, -edge_time
,
4357 es
->predicate
? *es
->predicate
: true,
4358 es
->predicate
? *es
->predicate
: true,
4360 /* Account new calls. */
4361 summarize_calls_size_and_time (edge
->callee
, info
);
4364 /* Free summaries that are not maintained for inline clones/edges. */
4365 ipa_call_summaries
->remove (edge
);
4366 ipa_fn_summaries
->remove (edge
->callee
);
4367 ipa_remove_from_growth_caches (edge
);
4370 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4371 overall size and time. Recompute it.
4372 If RESET is true also recompute call_time_size_table. */
4375 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4377 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4378 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4382 size_info
->size
= 0;
4384 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4386 size_info
->size
+= e
->size
;
4387 info
->time
+= e
->time
;
4389 info
->min_size
= info
->size_time_table
[0].size
;
4391 info
->call_size_time_table
.release ();
4392 if (node
->callees
|| node
->indirect_calls
)
4393 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4395 ~(clause_t
) (1 << ipa_predicate::false_condition
),
4397 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4398 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4402 /* This function performs intraprocedural analysis in NODE that is required to
4403 inline indirect calls. */
4406 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4408 ipa_analyze_node (node
);
4409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4411 ipa_print_node_params (dump_file
, node
);
4412 ipa_print_node_jump_functions (dump_file
, node
);
4417 /* Note function body size. */
4420 inline_analyze_function (struct cgraph_node
*node
)
4422 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4425 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4426 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4427 inline_indirect_intraprocedural_analysis (node
);
4428 compute_fn_summary (node
, false);
4431 struct cgraph_edge
*e
;
4432 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4433 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4434 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4435 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4442 /* Called when new function is inserted to callgraph late. */
4445 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4447 inline_analyze_function (node
);
4450 /* Note function body size. */
4453 ipa_fn_summary_generate (void)
4455 struct cgraph_node
*node
;
4457 FOR_EACH_DEFINED_FUNCTION (node
)
4458 if (DECL_STRUCT_FUNCTION (node
->decl
))
4459 node
->versionable
= tree_versionable_function_p (node
->decl
);
4461 ipa_fn_summary_alloc ();
4463 ipa_fn_summaries
->enable_insertion_hook ();
4465 ipa_register_cgraph_hooks ();
4467 FOR_EACH_DEFINED_FUNCTION (node
)
4469 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4470 || opt_for_fn (node
->decl
, optimize
)))
4471 inline_analyze_function (node
);
4475 /* Write inline summary for edge E to OB. */
4478 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4481 class ipa_call_summary
*es
= prevails
4482 ? ipa_call_summaries
->get_create (e
) : NULL
;
4486 int size
= streamer_read_uhwi (ib
);
4487 int time
= streamer_read_uhwi (ib
);
4488 int depth
= streamer_read_uhwi (ib
);
4492 es
->call_stmt_size
= size
;
4493 es
->call_stmt_time
= time
;
4494 es
->loop_depth
= depth
;
4497 bitpack_d bp
= streamer_read_bitpack (ib
);
4499 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4501 bp_unpack_value (&bp
, 1);
4505 edge_set_predicate (e
, &p
);
4506 length
= streamer_read_uhwi (ib
);
4508 && (e
->possibly_call_in_translation_unit_p ()
4509 /* Also stream in jump functions to builtins in hope that they
4510 will get fnspecs. */
4511 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4513 es
->param
.safe_grow_cleared (length
, true);
4514 for (i
= 0; i
< length
; i
++)
4516 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4517 bitpack_d bp
= streamer_read_bitpack (ib
);
4518 es
->param
[i
].points_to_local_or_readonly_memory
4519 = bp_unpack_value (&bp
, 1);
4520 es
->param
[i
].points_to_possible_sra_candidate
4521 = bp_unpack_value (&bp
, 1);
4526 for (i
= 0; i
< length
; i
++)
4528 streamer_read_uhwi (ib
);
4529 streamer_read_uhwi (ib
);
4535 /* Stream in inline summaries from the section. */
4538 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4541 const struct lto_function_header
*header
=
4542 (const struct lto_function_header
*) data
;
4543 const int cfg_offset
= sizeof (struct lto_function_header
);
4544 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4545 const int string_offset
= main_offset
+ header
->main_size
;
4546 class data_in
*data_in
;
4547 unsigned int i
, count2
, j
;
4548 unsigned int f_count
;
4550 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4551 file_data
->mode_table
);
4554 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4555 header
->string_size
, vNULL
);
4556 f_count
= streamer_read_uhwi (&ib
);
4557 for (i
= 0; i
< f_count
; i
++)
4560 struct cgraph_node
*node
;
4561 class ipa_fn_summary
*info
;
4562 class ipa_node_params
*params_summary
;
4563 class ipa_size_summary
*size_info
;
4564 lto_symtab_encoder_t encoder
;
4565 struct bitpack_d bp
;
4566 struct cgraph_edge
*e
;
4569 index
= streamer_read_uhwi (&ib
);
4570 encoder
= file_data
->symtab_node_encoder
;
4571 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4573 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4574 params_summary
= node
->prevailing_p ()
4575 ? ipa_node_params_sum
->get (node
) : NULL
;
4576 size_info
= node
->prevailing_p ()
4577 ? ipa_size_summaries
->get_create (node
) : NULL
;
4579 int stack_size
= streamer_read_uhwi (&ib
);
4580 int size
= streamer_read_uhwi (&ib
);
4581 sreal time
= sreal::stream_in (&ib
);
4585 info
->estimated_stack_size
4586 = size_info
->estimated_self_stack_size
= stack_size
;
4587 size_info
->size
= size_info
->self_size
= size
;
4591 bp
= streamer_read_bitpack (&ib
);
4594 info
->inlinable
= bp_unpack_value (&bp
, 1);
4595 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4596 if (!lto_stream_offload_p
)
4597 info
->target_info
= streamer_read_uhwi (&ib
);
4601 bp_unpack_value (&bp
, 1);
4602 bp_unpack_value (&bp
, 1);
4603 if (!lto_stream_offload_p
)
4604 streamer_read_uhwi (&ib
);
4607 count2
= streamer_read_uhwi (&ib
);
4608 gcc_assert (!info
|| !info
->conds
);
4610 vec_safe_reserve_exact (info
->conds
, count2
);
4611 for (j
= 0; j
< count2
; j
++)
4614 unsigned int k
, count3
;
4615 c
.operand_num
= streamer_read_uhwi (&ib
);
4616 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4617 c
.type
= stream_read_tree (&ib
, data_in
);
4618 c
.val
= stream_read_tree (&ib
, data_in
);
4619 bp
= streamer_read_bitpack (&ib
);
4620 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4621 c
.by_ref
= bp_unpack_value (&bp
, 1);
4623 c
.offset
= streamer_read_uhwi (&ib
);
4624 count3
= streamer_read_uhwi (&ib
);
4627 vec_safe_reserve_exact (c
.param_ops
, count3
);
4629 ipa_set_param_used_by_ipa_predicates
4630 (params_summary
, c
.operand_num
, true);
4631 for (k
= 0; k
< count3
; k
++)
4633 struct expr_eval_op op
;
4634 enum gimple_rhs_class rhs_class
;
4635 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4636 op
.type
= stream_read_tree (&ib
, data_in
);
4637 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4639 case GIMPLE_UNARY_RHS
:
4641 op
.val
[0] = NULL_TREE
;
4642 op
.val
[1] = NULL_TREE
;
4645 case GIMPLE_BINARY_RHS
:
4646 case GIMPLE_TERNARY_RHS
:
4647 bp
= streamer_read_bitpack (&ib
);
4648 op
.index
= bp_unpack_value (&bp
, 2);
4649 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4650 if (rhs_class
== GIMPLE_BINARY_RHS
)
4651 op
.val
[1] = NULL_TREE
;
4653 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4657 fatal_error (UNKNOWN_LOCATION
,
4658 "invalid fnsummary in LTO stream");
4661 c
.param_ops
->quick_push (op
);
4664 info
->conds
->quick_push (c
);
4666 count2
= streamer_read_uhwi (&ib
);
4667 gcc_assert (!info
|| !info
->size_time_table
.length ());
4669 info
->size_time_table
.reserve_exact (count2
);
4670 for (j
= 0; j
< count2
; j
++)
4672 class size_time_entry e
;
4674 e
.size
= streamer_read_uhwi (&ib
);
4675 e
.time
= sreal::stream_in (&ib
);
4676 e
.exec_predicate
.stream_in (&ib
);
4677 e
.nonconst_predicate
.stream_in (&ib
);
4680 info
->size_time_table
.quick_push (e
);
4683 count2
= streamer_read_uhwi (&ib
);
4684 for (j
= 0; j
< count2
; j
++)
4687 sreal fcp_freq
= sreal::stream_in (&ib
);
4690 ipa_freqcounting_predicate fcp
;
4691 fcp
.predicate
= NULL
;
4692 set_hint_predicate (&fcp
.predicate
, p
);
4693 fcp
.freq
= fcp_freq
;
4694 vec_safe_push (info
->loop_iterations
, fcp
);
4697 count2
= streamer_read_uhwi (&ib
);
4698 for (j
= 0; j
< count2
; j
++)
4701 sreal fcp_freq
= sreal::stream_in (&ib
);
4704 ipa_freqcounting_predicate fcp
;
4705 fcp
.predicate
= NULL
;
4706 set_hint_predicate (&fcp
.predicate
, p
);
4707 fcp
.freq
= fcp_freq
;
4708 vec_safe_push (info
->loop_strides
, fcp
);
4711 count2
= streamer_read_uhwi (&ib
);
4713 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4714 for (j
= 0; j
< count2
; j
++)
4716 int parm
= streamer_read_uhwi (&ib
);
4718 info
->builtin_constant_p_parms
.quick_push (parm
);
4720 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4721 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4722 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4723 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4726 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4728 lto_data_in_delete (data_in
);
4732 /* Read inline summary. Jump functions are shared among ipa-cp
4733 and inliner, so when ipa-cp is active, we don't need to write them
4737 ipa_fn_summary_read (void)
4739 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4740 struct lto_file_decl_data
*file_data
;
4743 ipa_prop_read_jump_functions ();
4744 ipa_fn_summary_alloc ();
4746 while ((file_data
= file_data_vec
[j
++]))
4750 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4753 inline_read_section (file_data
, data
, len
);
4755 /* Fatal error here. We do not want to support compiling ltrans units
4756 with different version of compiler or different flags than the WPA
4757 unit, so this should never happen. */
4758 fatal_error (input_location
,
4759 "ipa inline summary is missing in input file");
4761 ipa_register_cgraph_hooks ();
4763 gcc_assert (ipa_fn_summaries
);
4764 ipa_fn_summaries
->enable_insertion_hook ();
4768 /* Write inline summary for edge E to OB. */
4771 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4773 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4776 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4777 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4778 streamer_write_uhwi (ob
, es
->loop_depth
);
4780 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4781 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4782 streamer_write_bitpack (&bp
);
4785 es
->predicate
->stream_out (ob
);
4787 streamer_write_uhwi (ob
, 0);
4788 streamer_write_uhwi (ob
, es
->param
.length ());
4789 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4791 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4792 bp
= bitpack_create (ob
->main_stream
);
4793 bp_pack_value (&bp
, es
->param
[i
].points_to_local_or_readonly_memory
, 1);
4794 bp_pack_value (&bp
, es
->param
[i
].points_to_possible_sra_candidate
, 1);
4795 streamer_write_bitpack (&bp
);
4800 /* Write inline summary for node in SET.
4801 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4802 active, we don't need to write them twice. */
4805 ipa_fn_summary_write (void)
4807 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4808 lto_symtab_encoder_iterator lsei
;
4809 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4810 unsigned int count
= 0;
4812 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4813 lsei_next_function_in_partition (&lsei
))
4815 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4816 if (cnode
->definition
&& !cnode
->alias
)
4819 streamer_write_uhwi (ob
, count
);
4821 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4822 lsei_next_function_in_partition (&lsei
))
4824 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4825 if (cnode
->definition
&& !cnode
->alias
)
4827 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4828 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4829 struct bitpack_d bp
;
4830 struct cgraph_edge
*edge
;
4833 struct condition
*c
;
4835 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4836 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4837 streamer_write_hwi (ob
, size_info
->self_size
);
4838 info
->time
.stream_out (ob
);
4839 bp
= bitpack_create (ob
->main_stream
);
4840 bp_pack_value (&bp
, info
->inlinable
, 1);
4841 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4842 streamer_write_bitpack (&bp
);
4843 if (!lto_stream_offload_p
)
4844 streamer_write_uhwi (ob
, info
->target_info
);
4845 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4846 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4849 struct expr_eval_op
*op
;
4851 streamer_write_uhwi (ob
, c
->operand_num
);
4852 streamer_write_uhwi (ob
, c
->code
);
4853 stream_write_tree (ob
, c
->type
, true);
4854 stream_write_tree (ob
, c
->val
, true);
4855 bp
= bitpack_create (ob
->main_stream
);
4856 bp_pack_value (&bp
, c
->agg_contents
, 1);
4857 bp_pack_value (&bp
, c
->by_ref
, 1);
4858 streamer_write_bitpack (&bp
);
4859 if (c
->agg_contents
)
4860 streamer_write_uhwi (ob
, c
->offset
);
4861 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4862 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4864 streamer_write_uhwi (ob
, op
->code
);
4865 stream_write_tree (ob
, op
->type
, true);
4868 bp
= bitpack_create (ob
->main_stream
);
4869 bp_pack_value (&bp
, op
->index
, 2);
4870 streamer_write_bitpack (&bp
);
4871 stream_write_tree (ob
, op
->val
[0], true);
4873 stream_write_tree (ob
, op
->val
[1], true);
4877 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4878 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4880 streamer_write_uhwi (ob
, e
->size
);
4881 e
->time
.stream_out (ob
);
4882 e
->exec_predicate
.stream_out (ob
);
4883 e
->nonconst_predicate
.stream_out (ob
);
4885 ipa_freqcounting_predicate
*fcp
;
4886 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4887 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4889 fcp
->predicate
->stream_out (ob
);
4890 fcp
->freq
.stream_out (ob
);
4892 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4893 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4895 fcp
->predicate
->stream_out (ob
);
4896 fcp
->freq
.stream_out (ob
);
4898 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4900 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4902 streamer_write_uhwi (ob
, ip
);
4903 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4904 write_ipa_call_summary (ob
, edge
);
4905 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4906 write_ipa_call_summary (ob
, edge
);
4909 streamer_write_char_stream (ob
->main_stream
, 0);
4910 produce_asm (ob
, NULL
);
4911 destroy_output_block (ob
);
4913 ipa_prop_write_jump_functions ();
4917 /* Release function summary. */
4920 ipa_free_fn_summary (void)
4922 if (!ipa_call_summaries
)
4924 ggc_delete (ipa_fn_summaries
);
4925 ipa_fn_summaries
= NULL
;
4926 delete ipa_call_summaries
;
4927 ipa_call_summaries
= NULL
;
4928 edge_predicate_pool
.release ();
4929 /* During IPA this is one of largest datastructures to release. */
4934 /* Release function summary. */
4937 ipa_free_size_summary (void)
4939 if (!ipa_size_summaries
)
4941 delete ipa_size_summaries
;
4942 ipa_size_summaries
= NULL
;
4947 const pass_data pass_data_local_fn_summary
=
4949 GIMPLE_PASS
, /* type */
4950 "local-fnsummary", /* name */
4951 OPTGROUP_INLINE
, /* optinfo_flags */
4952 TV_INLINE_PARAMETERS
, /* tv_id */
4953 0, /* properties_required */
4954 0, /* properties_provided */
4955 0, /* properties_destroyed */
4956 0, /* todo_flags_start */
4957 0, /* todo_flags_finish */
4960 class pass_local_fn_summary
: public gimple_opt_pass
4963 pass_local_fn_summary (gcc::context
*ctxt
)
4964 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4967 /* opt_pass methods: */
4968 opt_pass
* clone () final override
4970 return new pass_local_fn_summary (m_ctxt
);
4972 unsigned int execute (function
*) final override
4974 return compute_fn_summary_for_current ();
4977 }; // class pass_local_fn_summary
4982 make_pass_local_fn_summary (gcc::context
*ctxt
)
4984 return new pass_local_fn_summary (ctxt
);
4988 /* Free inline summary. */
4992 const pass_data pass_data_ipa_free_fn_summary
=
4994 SIMPLE_IPA_PASS
, /* type */
4995 "free-fnsummary", /* name */
4996 OPTGROUP_NONE
, /* optinfo_flags */
4997 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4998 0, /* properties_required */
4999 0, /* properties_provided */
5000 0, /* properties_destroyed */
5001 0, /* todo_flags_start */
5002 0, /* todo_flags_finish */
5005 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
5008 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
5009 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
5013 /* opt_pass methods: */
5014 opt_pass
*clone () final override
5016 return new pass_ipa_free_fn_summary (m_ctxt
);
5018 void set_pass_param (unsigned int n
, bool param
) final override
5020 gcc_assert (n
== 0);
5023 bool gate (function
*) final override
{ return true; }
5024 unsigned int execute (function
*) final override
5026 ipa_free_fn_summary ();
5027 /* Free ipa-prop structures if they are no longer needed. */
5028 ipa_free_all_structures_after_iinln ();
5030 ipa_free_size_summary ();
5036 }; // class pass_ipa_free_fn_summary
5040 simple_ipa_opt_pass
*
5041 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
5043 return new pass_ipa_free_fn_summary (ctxt
);
5048 const pass_data pass_data_ipa_fn_summary
=
5050 IPA_PASS
, /* type */
5051 "fnsummary", /* name */
5052 OPTGROUP_INLINE
, /* optinfo_flags */
5053 TV_IPA_FNSUMMARY
, /* tv_id */
5054 0, /* properties_required */
5055 0, /* properties_provided */
5056 0, /* properties_destroyed */
5057 0, /* todo_flags_start */
5058 ( TODO_dump_symtab
), /* todo_flags_finish */
5061 class pass_ipa_fn_summary
: public ipa_opt_pass_d
5064 pass_ipa_fn_summary (gcc::context
*ctxt
)
5065 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
5066 ipa_fn_summary_generate
, /* generate_summary */
5067 ipa_fn_summary_write
, /* write_summary */
5068 ipa_fn_summary_read
, /* read_summary */
5069 NULL
, /* write_optimization_summary */
5070 NULL
, /* read_optimization_summary */
5071 NULL
, /* stmt_fixup */
5072 0, /* function_transform_todo_flags_start */
5073 NULL
, /* function_transform */
5074 NULL
) /* variable_transform */
5077 /* opt_pass methods: */
5078 unsigned int execute (function
*) final override
{ return 0; }
5080 }; // class pass_ipa_fn_summary
5085 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
5087 return new pass_ipa_fn_summary (ctxt
);
5090 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
5091 within the same process. For use by toplev::finalize. */
5094 ipa_fnsummary_cc_finalize (void)
5096 ipa_free_fn_summary ();