1 /* Function summary pass.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
55 #define INCLUDE_VECTOR
57 #include "coretypes.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
65 #include "tree-streamer.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-iterator.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
79 #include "ipa-fnsummary.h"
81 #include "tree-scalar-evolution.h"
82 #include "ipa-utils.h"
83 #include "cfgexpand.h"
85 #include "stringpool.h"
87 #include "tree-into-ssa.h"
88 #include "symtab-clones.h"
89 #include "gimple-range.h"
93 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
94 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
95 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
97 /* Edge predicates goes here. */
98 static object_allocator
<ipa_predicate
> edge_predicate_pool ("edge predicates");
101 /* Dump IPA hints. */
103 ipa_dump_hints (FILE *f
, ipa_hints hints
)
107 fprintf (f
, "IPA hints:");
108 if (hints
& INLINE_HINT_indirect_call
)
110 hints
&= ~INLINE_HINT_indirect_call
;
111 fprintf (f
, " indirect_call");
113 if (hints
& INLINE_HINT_loop_iterations
)
115 hints
&= ~INLINE_HINT_loop_iterations
;
116 fprintf (f
, " loop_iterations");
118 if (hints
& INLINE_HINT_loop_stride
)
120 hints
&= ~INLINE_HINT_loop_stride
;
121 fprintf (f
, " loop_stride");
123 if (hints
& INLINE_HINT_same_scc
)
125 hints
&= ~INLINE_HINT_same_scc
;
126 fprintf (f
, " same_scc");
128 if (hints
& INLINE_HINT_in_scc
)
130 hints
&= ~INLINE_HINT_in_scc
;
131 fprintf (f
, " in_scc");
133 if (hints
& INLINE_HINT_cross_module
)
135 hints
&= ~INLINE_HINT_cross_module
;
136 fprintf (f
, " cross_module");
138 if (hints
& INLINE_HINT_declared_inline
)
140 hints
&= ~INLINE_HINT_declared_inline
;
141 fprintf (f
, " declared_inline");
143 if (hints
& INLINE_HINT_known_hot
)
145 hints
&= ~INLINE_HINT_known_hot
;
146 fprintf (f
, " known_hot");
148 if (hints
& INLINE_HINT_builtin_constant_p
)
150 hints
&= ~INLINE_HINT_builtin_constant_p
;
151 fprintf (f
, " builtin_constant_p");
157 /* Record SIZE and TIME to SUMMARY.
158 The accounted code will be executed when EXEC_PRED is true.
159 When NONCONST_PRED is false the code will evaluate to constant and
160 will get optimized out in specialized clones of the function.
161 If CALL is true account to call_size_time_table rather than
165 ipa_fn_summary::account_size_time (int size
, sreal time
,
166 const ipa_predicate
&exec_pred
,
167 const ipa_predicate
&nonconst_pred_in
,
173 ipa_predicate nonconst_pred
;
174 vec
<size_time_entry
> *table
= call
? &call_size_time_table
: &size_time_table
;
176 if (exec_pred
== false)
179 nonconst_pred
= nonconst_pred_in
& exec_pred
;
181 if (nonconst_pred
== false)
184 /* We need to create initial empty unconditional clause, but otherwise
185 we don't need to account empty times and sizes. */
186 if (!size
&& time
== 0 && table
->length ())
189 /* Only for calls we are unaccounting what we previously recorded. */
190 gcc_checking_assert (time
>= 0 || call
);
192 for (i
= 0; table
->iterate (i
, &e
); i
++)
193 if (e
->exec_predicate
== exec_pred
194 && e
->nonconst_predicate
== nonconst_pred
)
199 if (i
== max_size_time_table_size
)
204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
206 "\t\tReached limit on number of entries, "
207 "ignoring the predicate.");
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
212 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
213 ((double) size
) / ipa_fn_summary::size_scale
,
214 (time
.to_double ()), found
? "" : "new ");
215 exec_pred
.dump (dump_file
, conds
, 0);
216 if (exec_pred
!= nonconst_pred
)
218 fprintf (dump_file
, " nonconst:");
219 nonconst_pred
.dump (dump_file
, conds
);
222 fprintf (dump_file
, "\n");
226 class size_time_entry new_entry
;
227 new_entry
.size
= size
;
228 new_entry
.time
= time
;
229 new_entry
.exec_predicate
= exec_pred
;
230 new_entry
.nonconst_predicate
= nonconst_pred
;
232 call_size_time_table
.safe_push (new_entry
);
234 size_time_table
.safe_push (new_entry
);
240 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
241 /* Tolerate small roundoff issues. */
247 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
249 static struct cgraph_edge
*
250 redirect_to_unreachable (struct cgraph_edge
*e
)
252 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
253 struct cgraph_node
*target
254 = cgraph_node::get_create (builtin_decl_unreachable ());
257 e
= cgraph_edge::resolve_speculation (e
, target
->decl
);
259 e
= cgraph_edge::make_direct (e
, target
);
261 e
->redirect_callee (target
);
262 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
263 e
->inline_failed
= CIF_UNREACHABLE
;
264 e
->count
= profile_count::zero ();
265 es
->call_stmt_size
= 0;
266 es
->call_stmt_time
= 0;
268 callee
->remove_symbol_and_inline_clones ();
272 /* Set predicate for edge E. */
275 edge_set_predicate (struct cgraph_edge
*e
, ipa_predicate
*predicate
)
277 /* If the edge is determined to be never executed, redirect it
278 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
280 if (predicate
&& *predicate
== false
281 /* When handling speculative edges, we need to do the redirection
282 just once. Do it always on the direct edge, so we do not
283 attempt to resolve speculation while duplicating the edge. */
284 && (!e
->speculative
|| e
->callee
))
285 e
= redirect_to_unreachable (e
);
287 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
288 if (predicate
&& *predicate
!= true)
291 es
->predicate
= edge_predicate_pool
.allocate ();
292 *es
->predicate
= *predicate
;
297 edge_predicate_pool
.remove (es
->predicate
);
298 es
->predicate
= NULL
;
302 /* Set predicate for hint *P. */
305 set_hint_predicate (ipa_predicate
**p
, ipa_predicate new_predicate
)
307 if (new_predicate
== false || new_predicate
== true)
310 edge_predicate_pool
.remove (*p
);
316 *p
= edge_predicate_pool
.allocate ();
321 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
322 Otherwise add a new item to the vector with this predicate and frerq equal
323 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
324 in which case the function does nothing. */
327 add_freqcounting_predicate (vec
<ipa_freqcounting_predicate
, va_gc
> **v
,
328 const ipa_predicate
&new_predicate
, sreal add_freq
,
329 unsigned max_num_predicates
)
331 if (new_predicate
== false || new_predicate
== true)
333 ipa_freqcounting_predicate
*f
;
334 for (int i
= 0; vec_safe_iterate (*v
, i
, &f
); i
++)
335 if (new_predicate
== f
->predicate
)
340 if (vec_safe_length (*v
) >= max_num_predicates
)
341 /* Too many different predicates to account for. */
344 ipa_freqcounting_predicate fcp
;
345 fcp
.predicate
= NULL
;
346 set_hint_predicate (&fcp
.predicate
, new_predicate
);
348 vec_safe_push (*v
, fcp
);
352 /* Compute what conditions may or may not hold given information about
353 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
354 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
355 copy when called in a given context. It is a bitmask of conditions. Bit
356 0 means that condition is known to be false, while bit 1 means that condition
357 may or may not be true. These differs - for example NOT_INLINED condition
358 is always false in the second and also builtin_constant_p tests cannot use
359 the fact that parameter is indeed a constant.
361 When INLINE_P is true, assume that we are inlining. AVAL contains known
362 information about argument values. The function does not modify its content
363 and so AVALs could also be of type ipa_call_arg_values but so far all
364 callers work with the auto version and so we avoid the conversion for
367 ERROR_MARK value of an argument means compile time invariant. */
370 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
372 ipa_auto_call_arg_values
*avals
,
373 clause_t
*ret_clause
,
374 clause_t
*ret_nonspec_clause
,
375 ipa_call_summary
*es
)
377 clause_t clause
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
378 clause_t nonspec_clause
= 1 << ipa_predicate::not_inlined_condition
;
379 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
383 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
388 struct expr_eval_op
*op
;
390 if (c
->code
== ipa_predicate::not_sra_candidate
)
394 || (int)es
->param
.length () <= c
->operand_num
395 || !es
->param
[c
->operand_num
].points_to_possible_sra_candidate
)
396 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
397 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
403 if (c
->code
== ipa_predicate::changed
405 && (avals
->safe_sval_at(c
->operand_num
) == error_mark_node
))
408 if (tree sval
= avals
->safe_sval_at (c
->operand_num
))
409 val
= ipa_find_agg_cst_from_init (sval
, c
->offset
, c
->by_ref
);
412 ipa_argagg_value_list
avs (avals
);
413 val
= avs
.get_value (c
->operand_num
, c
->offset
/ BITS_PER_UNIT
,
419 val
= avals
->safe_sval_at (c
->operand_num
);
420 if (val
&& val
== error_mark_node
421 && c
->code
!= ipa_predicate::changed
)
426 && (c
->code
== ipa_predicate::changed
427 || c
->code
== ipa_predicate::is_not_constant
))
429 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
430 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
433 if (c
->code
== ipa_predicate::changed
)
435 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
439 if (c
->code
== ipa_predicate::is_not_constant
)
441 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
445 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
447 if (c
->type
!= TREE_TYPE (val
))
448 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
449 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
454 val
= fold_unary (op
->code
, op
->type
, val
);
455 else if (!op
->val
[1])
456 val
= fold_binary (op
->code
, op
->type
,
457 op
->index
? op
->val
[0] : val
,
458 op
->index
? val
: op
->val
[0]);
459 else if (op
->index
== 0)
460 val
= fold_ternary (op
->code
, op
->type
,
461 val
, op
->val
[0], op
->val
[1]);
462 else if (op
->index
== 1)
463 val
= fold_ternary (op
->code
, op
->type
,
464 op
->val
[0], val
, op
->val
[1]);
465 else if (op
->index
== 2)
466 val
= fold_ternary (op
->code
, op
->type
,
467 op
->val
[0], op
->val
[1], val
);
473 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
476 if (res
&& integer_zerop (res
))
478 if (res
&& integer_onep (res
))
480 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
482 |= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
486 if (c
->operand_num
< (int) avals
->m_known_value_ranges
.length ()
488 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
490 Value_Range
vr (avals
->m_known_value_ranges
[c
->operand_num
]);
491 if (!vr
.undefined_p ()
493 && (TYPE_SIZE (c
->type
) == TYPE_SIZE (vr
.type ())))
495 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
496 range_cast (vr
, c
->type
);
498 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
500 if (vr
.varying_p () || vr
.undefined_p ())
503 Value_Range
res (op
->type
);
506 Value_Range
varying (op
->type
);
507 varying
.set_varying (op
->type
);
508 range_op_handler
handler (op
->code
);
510 || !res
.supports_type_p (op
->type
)
511 || !handler
.fold_range (res
, op
->type
, vr
, varying
))
512 res
.set_varying (op
->type
);
514 else if (!op
->val
[1])
516 Value_Range
op0 (op
->type
);
517 range_op_handler
handler (op
->code
);
519 ipa_range_set_and_normalize (op0
, op
->val
[0]);
522 || !res
.supports_type_p (op
->type
)
523 || !handler
.fold_range (res
, op
->type
,
524 op
->index
? op0
: vr
,
525 op
->index
? vr
: op0
))
526 res
.set_varying (op
->type
);
529 res
.set_varying (op
->type
);
532 if (!vr
.varying_p () && !vr
.undefined_p ())
535 Value_Range
val_vr (TREE_TYPE (c
->val
));
536 range_op_handler
handler (c
->code
);
538 ipa_range_set_and_normalize (val_vr
, c
->val
);
541 || !val_vr
.supports_type_p (TREE_TYPE (c
->val
))
542 || !handler
.fold_range (res
, boolean_type_node
, vr
, val_vr
))
543 res
.set_varying (boolean_type_node
);
551 clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
552 nonspec_clause
|= 1 << (i
+ ipa_predicate::first_dynamic_condition
);
554 *ret_clause
= clause
;
555 if (ret_nonspec_clause
)
556 *ret_nonspec_clause
= nonspec_clause
;
559 /* Return true if VRP will be exectued on the function.
560 We do not want to anticipate optimizations that will not happen.
562 FIXME: This can be confused with -fdisable and debug counters and thus
563 it should not be used for correctness (only to make heuristics work).
564 This means that inliner should do its own optimizations of expressions
565 that it predicts to be constant so wrong code can not be triggered by
566 builtin_constant_p. */
569 vrp_will_run_p (struct cgraph_node
*node
)
571 return (opt_for_fn (node
->decl
, optimize
)
572 && !opt_for_fn (node
->decl
, optimize_debug
)
573 && opt_for_fn (node
->decl
, flag_tree_vrp
));
576 /* Similarly about FRE. */
579 fre_will_run_p (struct cgraph_node
*node
)
581 return (opt_for_fn (node
->decl
, optimize
)
582 && !opt_for_fn (node
->decl
, optimize_debug
)
583 && opt_for_fn (node
->decl
, flag_tree_fre
));
586 /* Work out what conditions might be true at invocation of E.
587 Compute costs for inlined edge if INLINE_P is true.
589 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
590 (if non-NULL) conditions evaluated for nonspecialized clone called
593 Vectors in AVALS will be populated with useful known information about
594 argument values - information not known to have any uses will be omitted -
595 except for m_known_contexts which will only be calculated if
596 COMPUTE_CONTEXTS is true. */
599 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
600 clause_t
*clause_ptr
,
601 clause_t
*nonspec_clause_ptr
,
602 ipa_auto_call_arg_values
*avals
,
603 bool compute_contexts
)
605 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
606 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
607 class ipa_edge_args
*args
;
608 class ipa_call_summary
*es
= NULL
;
611 *clause_ptr
= inline_p
? 0 : 1 << ipa_predicate::not_inlined_condition
;
613 if (ipa_node_params_sum
614 && !e
->call_stmt_cannot_inline_p
615 && (info
->conds
|| compute_contexts
)
616 && (args
= ipa_edge_args_sum
->get (e
)) != NULL
)
618 struct cgraph_node
*caller
;
619 class ipa_node_params
*caller_parms_info
, *callee_pi
= NULL
;
620 int i
, count
= ipa_get_cs_argument_count (args
);
621 es
= ipa_call_summaries
->get (e
);
625 if (e
->caller
->inlined_to
)
626 caller
= e
->caller
->inlined_to
;
629 caller_parms_info
= ipa_node_params_sum
->get (caller
);
630 callee_pi
= ipa_node_params_sum
->get (callee
);
632 /* Watch for thunks. */
634 /* Watch for variadic functions. */
635 count
= MIN (count
, ipa_get_param_count (callee_pi
));
639 for (i
= 0; i
< count
; i
++)
641 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
643 if (ipa_is_param_used_by_indirect_call (callee_pi
, i
)
644 || ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
646 /* Determine if we know constant value of the parameter. */
647 tree type
= ipa_get_type (callee_pi
, i
);
648 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
, type
);
650 if (!cst
&& e
->call_stmt
651 && i
< (int)gimple_call_num_args (e
->call_stmt
))
653 cst
= gimple_call_arg (e
->call_stmt
, i
);
654 if (!is_gimple_min_invariant (cst
))
659 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
660 if (!avals
->m_known_vals
.length ())
661 avals
->m_known_vals
.safe_grow_cleared (count
, true);
662 avals
->m_known_vals
[i
] = cst
;
664 else if (inline_p
&& !es
->param
[i
].change_prob
)
666 if (!avals
->m_known_vals
.length ())
667 avals
->m_known_vals
.safe_grow_cleared (count
, true);
668 avals
->m_known_vals
[i
] = error_mark_node
;
671 /* If we failed to get simple constant, try value range. */
672 if ((!cst
|| TREE_CODE (cst
) != INTEGER_CST
)
673 && vrp_will_run_p (caller
)
674 && ipa_is_param_used_by_ipa_predicates (callee_pi
, i
))
676 Value_Range
vr (type
);
678 ipa_value_range_from_jfunc (vr
, caller_parms_info
, e
, jf
, type
);
679 if (!vr
.undefined_p () && !vr
.varying_p ())
681 if (!avals
->m_known_value_ranges
.length ())
682 avals
->m_known_value_ranges
.safe_grow_cleared (count
,
684 avals
->m_known_value_ranges
[i
] = vr
;
688 /* Determine known aggregate values. */
689 if (fre_will_run_p (caller
))
690 ipa_push_agg_values_from_jfunc (caller_parms_info
,
692 &avals
->m_known_aggs
);
695 /* For calls used in polymorphic calls we further determine
696 polymorphic call context. */
698 && ipa_is_param_used_by_polymorphic_call (callee_pi
, i
))
700 ipa_polymorphic_call_context
701 ctx
= ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
702 if (!ctx
.useless_p ())
704 if (!avals
->m_known_contexts
.length ())
705 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
706 avals
->m_known_contexts
[i
]
707 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
712 gcc_assert (!count
|| callee
->thunk
);
714 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
&& info
->conds
)
716 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
718 for (i
= 0; i
< count
; i
++)
720 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
721 if (!is_gimple_min_invariant (cst
))
725 if (!avals
->m_known_vals
.length ())
726 avals
->m_known_vals
.safe_grow_cleared (count
, true);
727 avals
->m_known_vals
[i
] = cst
;
732 evaluate_conditions_for_known_args (callee
, inline_p
, avals
, clause_ptr
,
733 nonspec_clause_ptr
, es
);
737 /* Allocate the function summary. */
740 ipa_fn_summary_alloc (void)
742 gcc_checking_assert (!ipa_fn_summaries
);
743 ipa_size_summaries
= new ipa_size_summary_t (symtab
);
744 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
745 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
748 ipa_call_summary::~ipa_call_summary ()
751 edge_predicate_pool
.remove (predicate
);
756 ipa_fn_summary::~ipa_fn_summary ()
758 unsigned len
= vec_safe_length (loop_iterations
);
759 for (unsigned i
= 0; i
< len
; i
++)
760 edge_predicate_pool
.remove ((*loop_iterations
)[i
].predicate
);
761 len
= vec_safe_length (loop_strides
);
762 for (unsigned i
= 0; i
< len
; i
++)
763 edge_predicate_pool
.remove ((*loop_strides
)[i
].predicate
);
765 call_size_time_table
.release ();
766 vec_free (loop_iterations
);
767 vec_free (loop_strides
);
768 builtin_constant_p_parms
.release ();
772 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
775 for (e
= node
->callees
; e
; e
= e
->next_callee
)
776 ipa_call_summaries
->remove (e
);
777 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
778 ipa_call_summaries
->remove (e
);
781 /* Duplicate predicates in loop hint vector, allocating memory for them and
782 remove and deallocate any uninteresting (true or false) ones. Return the
785 static vec
<ipa_freqcounting_predicate
, va_gc
> *
786 remap_freqcounting_preds_after_dup (vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
787 clause_t possible_truths
)
789 if (vec_safe_length (v
) == 0)
792 vec
<ipa_freqcounting_predicate
, va_gc
> *res
= v
->copy ();
793 int len
= res
->length();
794 for (int i
= len
- 1; i
>= 0; i
--)
796 ipa_predicate new_predicate
797 = (*res
)[i
].predicate
->remap_after_duplication (possible_truths
);
798 /* We do not want to free previous predicate; it is used by node
800 (*res
)[i
].predicate
= NULL
;
801 set_hint_predicate (&(*res
)[i
].predicate
, new_predicate
);
803 if (!(*res
)[i
].predicate
)
804 res
->unordered_remove (i
);
811 /* Hook that is called by cgraph.cc when a node is duplicated. */
813 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
815 ipa_fn_summary
*src_info
,
816 ipa_fn_summary
*info
)
818 new (info
) ipa_fn_summary (*src_info
);
819 /* TODO: as an optimization, we may avoid copying conditions
820 that are known to be false or true. */
821 info
->conds
= vec_safe_copy (info
->conds
);
823 clone_info
*cinfo
= clone_info::get (dst
);
824 /* When there are any replacements in the function body, see if we can figure
825 out that something was optimized out. */
826 if (ipa_node_params_sum
&& cinfo
&& cinfo
->tree_map
)
828 /* Use SRC parm info since it may not be copied yet. */
829 ipa_node_params
*parms_info
= ipa_node_params_sum
->get (src
);
830 ipa_auto_call_arg_values avals
;
831 int count
= ipa_get_param_count (parms_info
);
833 clause_t possible_truths
;
834 ipa_predicate true_pred
= true;
836 int optimized_out_size
= 0;
837 bool inlined_to_p
= false;
838 struct cgraph_edge
*edge
, *next
;
840 info
->size_time_table
.release ();
841 avals
.m_known_vals
.safe_grow_cleared (count
, true);
842 for (i
= 0; i
< count
; i
++)
844 struct ipa_replace_map
*r
;
846 for (j
= 0; vec_safe_iterate (cinfo
->tree_map
, j
, &r
); j
++)
848 if (r
->parm_num
== i
)
850 avals
.m_known_vals
[i
] = r
->new_tree
;
855 evaluate_conditions_for_known_args (dst
, false,
858 /* We are going to specialize,
859 so ignore nonspec truths. */
863 info
->account_size_time (0, 0, true_pred
, true_pred
);
865 /* Remap size_time vectors.
866 Simplify the predicate by pruning out alternatives that are known
868 TODO: as on optimization, we can also eliminate conditions known
870 for (i
= 0; src_info
->size_time_table
.iterate (i
, &e
); i
++)
872 ipa_predicate new_exec_pred
;
873 ipa_predicate new_nonconst_pred
;
874 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
876 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
878 if (new_exec_pred
== false || new_nonconst_pred
== false)
879 optimized_out_size
+= e
->size
;
881 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
885 /* Remap edge predicates with the same simplification as above.
886 Also copy constantness arrays. */
887 for (edge
= dst
->callees
; edge
; edge
= next
)
889 ipa_predicate new_predicate
;
890 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
891 next
= edge
->next_callee
;
893 if (!edge
->inline_failed
)
897 new_predicate
= es
->predicate
->remap_after_duplication
899 if (new_predicate
== false && *es
->predicate
!= false)
900 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
901 edge_set_predicate (edge
, &new_predicate
);
904 /* Remap indirect edge predicates with the same simplification as above.
905 Also copy constantness arrays. */
906 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
908 ipa_predicate new_predicate
;
909 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
910 next
= edge
->next_callee
;
912 gcc_checking_assert (edge
->inline_failed
);
915 new_predicate
= es
->predicate
->remap_after_duplication
917 if (new_predicate
== false && *es
->predicate
!= false)
919 += es
->call_stmt_size
* ipa_fn_summary::size_scale
;
920 edge_set_predicate (edge
, &new_predicate
);
922 info
->loop_iterations
923 = remap_freqcounting_preds_after_dup (info
->loop_iterations
,
926 = remap_freqcounting_preds_after_dup (info
->loop_strides
,
928 if (info
->builtin_constant_p_parms
.length())
930 vec
<int, va_heap
, vl_ptr
> parms
= info
->builtin_constant_p_parms
;
932 info
->builtin_constant_p_parms
= vNULL
;
933 for (i
= 0; parms
.iterate (i
, &ip
); i
++)
934 if (!avals
.m_known_vals
[ip
])
935 info
->builtin_constant_p_parms
.safe_push (ip
);
938 /* If inliner or someone after inliner will ever start producing
939 non-trivial clones, we will get trouble with lack of information
940 about updating self sizes, because size vectors already contains
941 sizes of the callees. */
942 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
946 info
->size_time_table
= src_info
->size_time_table
.copy ();
947 info
->loop_iterations
= vec_safe_copy (src_info
->loop_iterations
);
948 info
->loop_strides
= vec_safe_copy (info
->loop_strides
);
950 info
->builtin_constant_p_parms
951 = info
->builtin_constant_p_parms
.copy ();
953 ipa_freqcounting_predicate
*f
;
954 for (int i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &f
); i
++)
956 ipa_predicate p
= *f
->predicate
;
958 set_hint_predicate (&f
->predicate
, p
);
960 for (int i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &f
); i
++)
962 ipa_predicate p
= *f
->predicate
;
964 set_hint_predicate (&f
->predicate
, p
);
967 if (!dst
->inlined_to
)
968 ipa_update_overall_fn_summary (dst
);
972 /* Hook that is called by cgraph.cc when a node is duplicated. */
975 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
976 struct cgraph_edge
*dst
,
977 class ipa_call_summary
*srcinfo
,
978 class ipa_call_summary
*info
)
980 new (info
) ipa_call_summary (*srcinfo
);
981 info
->predicate
= NULL
;
982 edge_set_predicate (dst
, srcinfo
->predicate
);
983 info
->param
= srcinfo
->param
.copy ();
984 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
986 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
987 - eni_size_weights
.call_cost
);
988 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
989 - eni_time_weights
.call_cost
);
993 /* Dump edge summaries associated to NODE and recursively to all clones.
997 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
998 class ipa_fn_summary
*info
)
1000 struct cgraph_edge
*edge
;
1001 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
1003 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1004 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
1008 "%*s%s %s\n%*s freq:%4.2f",
1009 indent
, "", callee
->dump_name (),
1010 !edge
->inline_failed
1011 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
1012 indent
, "", edge
->sreal_frequency ().to_double ());
1014 if (cross_module_call_p (edge
))
1015 fprintf (f
, " cross module");
1018 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
1019 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
1021 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
1022 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1024 fprintf (f
, " callee size:%2i stack:%2i",
1025 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
1026 (int) s
->estimated_stack_size
);
1028 if (es
&& es
->predicate
)
1030 fprintf (f
, " predicate: ");
1031 es
->predicate
->dump (f
, info
->conds
);
1035 if (es
&& es
->param
.exists ())
1036 for (i
= 0; i
< (int) es
->param
.length (); i
++)
1038 int prob
= es
->param
[i
].change_prob
;
1041 fprintf (f
, "%*s op%i is compile time invariant\n",
1043 else if (prob
!= REG_BR_PROB_BASE
)
1044 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
1045 prob
* 100.0 / REG_BR_PROB_BASE
);
1046 if (es
->param
[i
].points_to_local_or_readonly_memory
)
1047 fprintf (f
, "%*s op%i points to local or readonly memory\n",
1049 if (es
->param
[i
].points_to_possible_sra_candidate
)
1050 fprintf (f
, "%*s op%i points to possible sra candidate\n",
1053 if (!edge
->inline_failed
)
1055 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
1056 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
1058 (int) ipa_get_stack_frame_offset (callee
),
1059 (int) ss
->estimated_self_stack_size
);
1060 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
1063 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
1065 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
1066 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1070 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
1071 es
->call_stmt_time
);
1074 fprintf (f
, "predicate: ");
1075 es
->predicate
->dump (f
, info
->conds
);
1084 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
1086 if (node
->definition
)
1088 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
1089 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
1094 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
1095 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
1096 fprintf (f
, " always_inline");
1098 fprintf (f
, " inlinable");
1099 if (s
->fp_expressions
)
1100 fprintf (f
, " fp_expression");
1101 if (s
->builtin_constant_p_parms
.length ())
1103 fprintf (f
, " builtin_constant_p_parms");
1104 for (unsigned int i
= 0;
1105 i
< s
->builtin_constant_p_parms
.length (); i
++)
1106 fprintf (f
, " %i", s
->builtin_constant_p_parms
[i
]);
1108 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
1109 fprintf (f
, " self size: %i\n", ss
->self_size
);
1110 fprintf (f
, " global size: %i\n", ss
->size
);
1111 fprintf (f
, " min size: %i\n", s
->min_size
);
1112 fprintf (f
, " self stack: %i\n",
1113 (int) ss
->estimated_self_stack_size
);
1114 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
1116 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
1118 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
1119 for (i
= 0; s
->size_time_table
.iterate (i
, &e
); i
++)
1121 fprintf (f
, " size:%f, time:%f",
1122 (double) e
->size
/ ipa_fn_summary::size_scale
,
1123 e
->time
.to_double ());
1124 if (e
->exec_predicate
!= true)
1126 fprintf (f
, ", executed if:");
1127 e
->exec_predicate
.dump (f
, s
->conds
, 0);
1129 if (e
->exec_predicate
!= e
->nonconst_predicate
)
1131 fprintf (f
, ", nonconst if:");
1132 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
1136 ipa_freqcounting_predicate
*fcp
;
1137 bool first_fcp
= true;
1138 for (int i
= 0; vec_safe_iterate (s
->loop_iterations
, i
, &fcp
); i
++)
1142 fprintf (f
, " loop iterations:");
1145 fprintf (f
, " %3.2f for ", fcp
->freq
.to_double ());
1146 fcp
->predicate
->dump (f
, s
->conds
);
1149 for (int i
= 0; vec_safe_iterate (s
->loop_strides
, i
, &fcp
); i
++)
1153 fprintf (f
, " loop strides:");
1156 fprintf (f
, " %3.2f for :", fcp
->freq
.to_double ());
1157 fcp
->predicate
->dump (f
, s
->conds
);
1159 fprintf (f
, " calls:\n");
1160 dump_ipa_call_summary (f
, 4, node
, s
);
1163 fprintf (f
, " target_info: %x\n", s
->target_info
);
1166 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
1171 ipa_debug_fn_summary (struct cgraph_node
*node
)
1173 ipa_dump_fn_summary (stderr
, node
);
1177 ipa_dump_fn_summaries (FILE *f
)
1179 struct cgraph_node
*node
;
1181 FOR_EACH_DEFINED_FUNCTION (node
)
1182 if (!node
->inlined_to
)
1183 ipa_dump_fn_summary (f
, node
);
1186 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1187 boolean variable pointed to by DATA. */
1190 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1193 bool *b
= (bool *) data
;
1198 /* If OP refers to value of function parameter, return the corresponding
1199 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1200 PARM_DECL) will be stored to *SIZE_P in that case too. */
1203 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1206 /* SSA_NAME referring to parm default def? */
1207 if (TREE_CODE (op
) == SSA_NAME
1208 && SSA_NAME_IS_DEFAULT_DEF (op
)
1209 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1212 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1213 return SSA_NAME_VAR (op
);
1215 /* Non-SSA parm reference? */
1216 if (TREE_CODE (op
) == PARM_DECL
1217 && fbi
->aa_walk_budget
> 0)
1219 bool modified
= false;
1222 ao_ref_init (&refd
, op
);
1223 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1224 mark_modified
, &modified
, NULL
, NULL
,
1225 fbi
->aa_walk_budget
);
1228 fbi
->aa_walk_budget
= 0;
1231 fbi
->aa_walk_budget
-= walked
;
1235 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1242 /* If OP refers to value of function parameter, return the corresponding
1243 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1244 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1245 stored to *SIZE_P in that case too. */
1248 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1251 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1255 if (TREE_CODE (op
) == SSA_NAME
1256 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1257 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1258 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1259 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1264 /* If OP refers to a value of a function parameter or value loaded from an
1265 aggregate passed to a parameter (either by value or reference), return TRUE
1266 and store the number of the parameter to *INDEX_P, the access size into
1267 *SIZE_P, and information whether and how it has been loaded from an
1268 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1269 statement in which OP is used or loaded. */
1272 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1273 gimple
*stmt
, tree op
, int *index_p
,
1275 struct agg_position_info
*aggpos
)
1277 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1279 gcc_checking_assert (aggpos
);
1282 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1285 aggpos
->agg_contents
= false;
1286 aggpos
->by_ref
= false;
1290 if (TREE_CODE (op
) == SSA_NAME
)
1292 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1293 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1295 stmt
= SSA_NAME_DEF_STMT (op
);
1296 op
= gimple_assign_rhs1 (stmt
);
1297 if (!REFERENCE_CLASS_P (op
))
1298 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1302 aggpos
->agg_contents
= true;
1303 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1304 stmt
, op
, index_p
, &aggpos
->offset
,
1305 size_p
, &aggpos
->by_ref
);
1308 /* If stmt is simple load or store of value pointed to by a function parmaeter,
1309 return its index. */
1312 load_or_store_of_ptr_parameter (ipa_func_body_info
*fbi
, gimple
*stmt
)
1316 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
1320 if (gimple_assign_load_p (stmt
))
1321 param
= gimple_assign_rhs1 (stmt
);
1322 else if (gimple_store_p (stmt
))
1323 param
= gimple_assign_lhs (stmt
);
1326 tree base
= get_base_address (param
);
1327 if (TREE_CODE (base
) != MEM_REF
1328 || TREE_CODE (TREE_OPERAND (base
, 0)) != SSA_NAME
1329 || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base
, 0)))
1331 tree p
= SSA_NAME_VAR (TREE_OPERAND (base
, 0));
1332 if (TREE_CODE (p
) != PARM_DECL
)
1334 return ipa_get_param_decl_index (fbi
->info
, p
);
1337 /* See if statement might disappear after inlining.
1338 0 - means not eliminated
1339 1 - half of statements goes away
1340 2 - for sure it is eliminated.
1341 We are not terribly sophisticated, basically looking for simple abstraction
1342 penalty wrappers. */
1345 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1347 enum gimple_code code
= gimple_code (stmt
);
1348 enum tree_code rhs_code
;
1358 if (gimple_num_ops (stmt
) != 2)
1361 rhs_code
= gimple_assign_rhs_code (stmt
);
1363 /* Casts of parameters, loads from parameters passed by reference
1364 and stores to return value or parameters are often free after
1365 inlining due to SRA and further combining.
1366 Assume that half of statements goes away. */
1367 if (CONVERT_EXPR_CODE_P (rhs_code
)
1368 || rhs_code
== VIEW_CONVERT_EXPR
1369 || rhs_code
== ADDR_EXPR
1370 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1372 tree rhs
= gimple_assign_rhs1 (stmt
);
1373 tree lhs
= gimple_assign_lhs (stmt
);
1374 tree inner_rhs
= get_base_address (rhs
);
1375 tree inner_lhs
= get_base_address (lhs
);
1376 bool rhs_free
= false;
1377 bool lhs_free
= false;
1384 /* Reads of parameter are expected to be free. */
1385 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1387 /* Match expressions of form &this->field. Those will most likely
1388 combine with something upstream after inlining. */
1389 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1391 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1392 if (TREE_CODE (op
) == PARM_DECL
)
1394 else if (TREE_CODE (op
) == MEM_REF
1395 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1400 /* When parameter is not SSA register because its address is taken
1401 and it is just copied into one, the statement will be completely
1402 free after inlining (we will copy propagate backward). */
1403 if (rhs_free
&& is_gimple_reg (lhs
))
1406 /* Reads of parameters passed by reference
1407 expected to be free (i.e. optimized out after inlining). */
1408 if (TREE_CODE (inner_rhs
) == MEM_REF
1409 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1412 /* Copying parameter passed by reference into gimple register is
1413 probably also going to copy propagate, but we can't be quite
1415 if (rhs_free
&& is_gimple_reg (lhs
))
1418 /* Writes to parameters, parameters passed by value and return value
1419 (either directly or passed via invisible reference) are free.
1421 TODO: We ought to handle testcase like
1422 struct a {int a,b;};
1430 This translate into:
1445 For that we either need to copy ipa-split logic detecting writes
1447 if (TREE_CODE (inner_lhs
) == PARM_DECL
1448 || TREE_CODE (inner_lhs
) == RESULT_DECL
1449 || (TREE_CODE (inner_lhs
) == MEM_REF
1450 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1452 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1453 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1454 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1456 0))) == RESULT_DECL
))))
1459 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1461 if (lhs_free
&& rhs_free
)
1470 /* Analyze EXPR if it represents a series of simple operations performed on
1471 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1472 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1473 Type of the parameter or load from an aggregate via the parameter is
1474 stored in *TYPE_P. Operations on the parameter are recorded to
1475 PARAM_OPS_P if it is not NULL. */
1478 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1479 gimple
*stmt
, tree expr
,
1480 int *index_p
, tree
*type_p
,
1481 struct agg_position_info
*aggpos
,
1482 expr_eval_ops
*param_ops_p
= NULL
)
1484 int op_limit
= opt_for_fn (fbi
->node
->decl
, param_ipa_max_param_expr_ops
);
1488 *param_ops_p
= NULL
;
1492 expr_eval_op eval_op
;
1494 unsigned cst_count
= 0;
1496 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1499 tree type
= TREE_TYPE (expr
);
1501 if (aggpos
->agg_contents
)
1503 /* Stop if containing bit-field. */
1504 if (TREE_CODE (expr
) == BIT_FIELD_REF
1505 || contains_bitfld_component_ref_p (expr
))
1513 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1515 stmt
= SSA_NAME_DEF_STMT (expr
);
1517 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
1519 int flags
= gimple_call_return_flags (call
);
1520 if (!(flags
& ERF_RETURNS_ARG
))
1522 int arg
= flags
& ERF_RETURN_ARG_MASK
;
1523 if (arg
>= (int)gimple_call_num_args (call
))
1525 expr
= gimple_call_arg (stmt
, arg
);
1529 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1532 switch (gimple_assign_rhs_class (stmt
))
1534 case GIMPLE_SINGLE_RHS
:
1535 expr
= gimple_assign_rhs1 (stmt
);
1538 case GIMPLE_UNARY_RHS
:
1542 case GIMPLE_BINARY_RHS
:
1546 case GIMPLE_TERNARY_RHS
:
1554 /* Stop if expression is too complex. */
1555 if (op_count
++ == op_limit
)
1560 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1561 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1562 eval_op
.val
[0] = NULL_TREE
;
1563 eval_op
.val
[1] = NULL_TREE
;
1567 for (unsigned i
= 0; i
< rhs_count
; i
++)
1569 tree op
= gimple_op (stmt
, i
+ 1);
1571 gcc_assert (op
&& !TYPE_P (op
));
1572 if (is_gimple_ip_invariant (op
))
1574 if (++cst_count
== rhs_count
)
1577 eval_op
.val
[cst_count
- 1] = op
;
1581 /* Found a non-constant operand, and record its index in rhs
1588 /* Found more than one non-constant operands. */
1594 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1597 /* Failed to decompose, free resource and return. */
1600 vec_free (*param_ops_p
);
1605 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1608 add_builtin_constant_p_parm (class ipa_fn_summary
*summary
, int parm
)
1612 /* Avoid duplicates. */
1613 for (unsigned int i
= 0;
1614 summary
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
1617 summary
->builtin_constant_p_parms
.safe_push (parm
);
1620 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1621 predicates to the CFG edges. */
1624 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1625 class ipa_fn_summary
*summary
,
1626 class ipa_node_params
*params_summary
,
1631 struct agg_position_info aggpos
;
1632 enum tree_code code
, inverted_code
;
1637 expr_eval_ops param_ops
;
1639 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
1642 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1644 op
= gimple_cond_lhs (last
);
1646 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1649 code
= gimple_cond_code (last
);
1650 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1652 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1654 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1655 ? code
: inverted_code
);
1656 /* invert_tree_comparison will return ERROR_MARK on FP
1657 comparisons that are not EQ/NE instead of returning proper
1658 unordered one. Be sure it is not confused with NON_CONSTANT.
1660 And if the edge's target is the final block of diamond CFG graph
1661 of this conditional statement, we do not need to compute
1662 predicate for the edge because the final block's predicate must
1663 be at least as that of the first block of the statement. */
1664 if (this_code
!= ERROR_MARK
1665 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1668 = add_condition (summary
, params_summary
, index
,
1669 param_type
, &aggpos
,
1670 this_code
, gimple_cond_rhs (last
), param_ops
);
1671 e
->aux
= edge_predicate_pool
.allocate ();
1672 *(ipa_predicate
*) e
->aux
= p
;
1675 vec_free (param_ops
);
1679 if (TREE_CODE (op
) != SSA_NAME
)
1682 if (builtin_constant_p (op))
1686 Here we can predicate nonconstant_code. We can't
1687 really handle constant_code since we have no predicate
1688 for this and also the constant code is not known to be
1689 optimized away when inliner doesn't see operand is constant.
1690 Other optimizers might think otherwise. */
1691 if (gimple_cond_code (last
) != NE_EXPR
1692 || !integer_zerop (gimple_cond_rhs (last
)))
1694 set_stmt
= SSA_NAME_DEF_STMT (op
);
1695 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1696 || gimple_call_num_args (set_stmt
) != 1)
1698 op2
= gimple_call_arg (set_stmt
, 0);
1699 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1702 add_builtin_constant_p_parm (summary
, index
);
1703 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1705 ipa_predicate p
= add_condition (summary
, params_summary
, index
,
1706 param_type
, &aggpos
,
1707 ipa_predicate::is_not_constant
, NULL_TREE
);
1708 e
->aux
= edge_predicate_pool
.allocate ();
1709 *(ipa_predicate
*) e
->aux
= p
;
1714 /* If BB ends by a switch we can turn into predicates, attach corresponding
1715 predicates to the CFG edges. */
1718 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1719 class ipa_fn_summary
*summary
,
1720 class ipa_node_params
*params_summary
,
1725 struct agg_position_info aggpos
;
1731 expr_eval_ops param_ops
;
1733 gswitch
*last
= safe_dyn_cast
<gswitch
*> (*gsi_last_bb (bb
));
1736 op
= gimple_switch_index (last
);
1737 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1741 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1742 tree type
= TREE_TYPE (op
);
1743 int bound_limit
= opt_for_fn (fbi
->node
->decl
,
1744 param_ipa_max_switch_predicate_bounds
);
1745 int bound_count
= 0;
1746 // This can safely be an integer range, as switches can only hold
1750 get_range_query (cfun
)->range_of_expr (vr
, op
);
1751 if (vr
.undefined_p ())
1752 vr
.set_varying (TREE_TYPE (op
));
1753 tree vr_min
, vr_max
;
1754 // TODO: This entire function could use a rewrite to use the irange
1755 // API, instead of trying to recreate its intersection/union logic.
1756 // Any use of get_legacy_range() is a serious code smell.
1757 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
1758 wide_int vr_wmin
= wi::to_wide (vr_min
);
1759 wide_int vr_wmax
= wi::to_wide (vr_max
);
1761 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1763 e
->aux
= edge_predicate_pool
.allocate ();
1764 *(ipa_predicate
*) e
->aux
= false;
1767 e
= gimple_switch_edge (cfun
, last
, 0);
1768 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1769 default case if its target basic block is in convergence point of all
1770 switch cases, which can be determined by checking whether it
1771 post-dominates the switch statement. */
1772 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1773 bound_count
= INT_MAX
;
1775 n
= gimple_switch_num_labels (last
);
1776 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1778 tree cl
= gimple_switch_label (last
, case_idx
);
1779 tree min
= CASE_LOW (cl
);
1780 tree max
= CASE_HIGH (cl
);
1783 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1785 /* The case value might not have same type as switch expression,
1786 extend the value based on the expression type. */
1787 if (TREE_TYPE (min
) != type
)
1788 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1792 else if (TREE_TYPE (max
) != type
)
1793 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1795 /* The case's target basic block is in convergence point of all switch
1796 cases, its predicate should be at least as that of the switch
1798 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1800 else if (min
== max
)
1801 p
= add_condition (summary
, params_summary
, index
, param_type
,
1802 &aggpos
, EQ_EXPR
, min
, param_ops
);
1805 ipa_predicate p1
, p2
;
1806 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1807 &aggpos
, GE_EXPR
, min
, param_ops
);
1808 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1809 &aggpos
, LE_EXPR
, max
, param_ops
);
1812 *(ipa_predicate
*) e
->aux
1813 = p
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1815 /* If there are too many disjoint case ranges, predicate for default
1816 case might become too complicated. So add a limit here. */
1817 if (bound_count
> bound_limit
)
1820 bool new_range
= true;
1822 if (!ranges
.is_empty ())
1824 wide_int curr_wmin
= wi::to_wide (min
);
1825 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1827 /* Merge case ranges if they are continuous. */
1828 if (curr_wmin
== last_wmax
+ 1)
1830 else if (vr_type
== VR_ANTI_RANGE
)
1832 /* If two disjoint case ranges can be connected by anti-range
1833 of switch index, combine them to one range. */
1834 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1835 vr_type
= VR_UNDEFINED
;
1836 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1841 /* Create/extend a case range. And we count endpoints of range set,
1842 this number nearly equals to number of conditions that we will create
1843 for predicate of default case. */
1846 bound_count
+= (min
== max
) ? 1 : 2;
1847 ranges
.safe_push (std::make_pair (min
, max
));
1851 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1852 ranges
.last ().second
= max
;
1856 e
= gimple_switch_edge (cfun
, last
, 0);
1857 if (bound_count
> bound_limit
)
1859 *(ipa_predicate
*) e
->aux
= true;
1860 vec_free (param_ops
);
1864 ipa_predicate p_seg
= true;
1865 ipa_predicate p_all
= false;
1867 if (vr_type
!= VR_RANGE
)
1869 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1870 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1873 /* Construct predicate to represent default range set that is negation of
1874 all case ranges. Case range is classified as containing single/non-single
1875 values. Suppose a piece of case ranges in the following.
1877 [D1...D2] [S1] ... [Sn] [D3...D4]
1879 To represent default case's range sets between two non-single value
1880 case ranges (From D2 to D3), we construct predicate as:
1882 D2 < x < D3 && x != S1 && ... && x != Sn
1884 for (size_t i
= 0; i
< ranges
.length (); i
++)
1886 tree min
= ranges
[i
].first
;
1887 tree max
= ranges
[i
].second
;
1890 p_seg
&= add_condition (summary
, params_summary
, index
,
1891 param_type
, &aggpos
, NE_EXPR
,
1895 /* Do not create sub-predicate for range that is beyond low bound
1897 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1899 p_seg
&= add_condition (summary
, params_summary
, index
,
1900 param_type
, &aggpos
,
1901 LT_EXPR
, min
, param_ops
);
1902 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1905 /* Do not create sub-predicate for range that is beyond up bound
1907 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1913 p_seg
= add_condition (summary
, params_summary
, index
,
1914 param_type
, &aggpos
, GT_EXPR
,
1919 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1920 *(ipa_predicate
*) e
->aux
1921 = p_all
.or_with (summary
->conds
, *(ipa_predicate
*) e
->aux
);
1923 vec_free (param_ops
);
1927 /* For each BB in NODE attach to its AUX pointer predicate under
1928 which it is executable. */
1931 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1932 struct cgraph_node
*node
,
1933 class ipa_fn_summary
*summary
,
1934 class ipa_node_params
*params_summary
)
1936 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1940 FOR_EACH_BB_FN (bb
, my_function
)
1942 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1943 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1946 /* Entry block is always executable. */
1947 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1948 = edge_predicate_pool
.allocate ();
1949 *(ipa_predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1951 /* A simple dataflow propagation of predicates forward in the CFG.
1952 TODO: work in reverse postorder. */
1956 FOR_EACH_BB_FN (bb
, my_function
)
1958 ipa_predicate p
= false;
1961 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1965 ipa_predicate this_bb_predicate
1966 = *(ipa_predicate
*) e
->src
->aux
;
1968 this_bb_predicate
&= (*(ipa_predicate
*) e
->aux
);
1969 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1976 basic_block pdom_bb
;
1981 bb
->aux
= edge_predicate_pool
.allocate ();
1982 *((ipa_predicate
*) bb
->aux
) = p
;
1984 else if (p
!= *(ipa_predicate
*) bb
->aux
)
1986 /* This OR operation is needed to ensure monotonous data flow
1987 in the case we hit the limit on number of clauses and the
1988 and/or operations above give approximate answers. */
1989 p
= p
.or_with (summary
->conds
, *(ipa_predicate
*)bb
->aux
);
1990 if (p
!= *(ipa_predicate
*)bb
->aux
)
1993 *((ipa_predicate
*)bb
->aux
) = p
;
1997 /* For switch/if statement, we can OR-combine predicates of all
1998 its cases/branches to get predicate for basic block in their
1999 convergence point, but sometimes this will generate very
2000 complicated predicate. Actually, we can get simplified
2001 predicate in another way by using the fact that predicate
2002 for a basic block must also hold true for its post dominators.
2003 To be specific, basic block in convergence point of
2004 conditional statement should include predicate of the
2006 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
2007 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
2009 else if (!pdom_bb
->aux
)
2012 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
2013 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
2015 else if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
2017 p
= p
.or_with (summary
->conds
,
2018 *(ipa_predicate
*)pdom_bb
->aux
);
2019 if (p
!= *(ipa_predicate
*)pdom_bb
->aux
)
2022 *((ipa_predicate
*)pdom_bb
->aux
) = p
;
2031 /* Return predicate specifying when the STMT might have result that is not
2032 a compile time constant. */
2034 static ipa_predicate
2035 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
2036 class ipa_fn_summary
*summary
,
2037 class ipa_node_params
*params_summary
,
2039 vec
<ipa_predicate
> nonconstant_names
)
2044 while (UNARY_CLASS_P (expr
))
2045 expr
= TREE_OPERAND (expr
, 0);
2047 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
2048 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2049 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
2050 ipa_predicate::changed
, NULL_TREE
);
2051 if (is_gimple_min_invariant (expr
))
2053 if (TREE_CODE (expr
) == SSA_NAME
)
2054 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
2055 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
2058 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2060 TREE_OPERAND (expr
, 0),
2066 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2068 TREE_OPERAND (expr
, 1),
2070 return p1
.or_with (summary
->conds
, p2
);
2072 else if (TREE_CODE (expr
) == COND_EXPR
)
2075 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2077 TREE_OPERAND (expr
, 0),
2083 = will_be_nonconstant_expr_predicate (fbi
, summary
,
2085 TREE_OPERAND (expr
, 1),
2089 p1
= p1
.or_with (summary
->conds
, p2
);
2090 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2092 TREE_OPERAND (expr
, 2),
2094 return p2
.or_with (summary
->conds
, p1
);
2096 else if (TREE_CODE (expr
) == CALL_EXPR
)
2106 /* Return predicate specifying when the STMT might have result that is not
2107 a compile time constant. */
2109 static ipa_predicate
2110 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
2111 class ipa_fn_summary
*summary
,
2112 class ipa_node_params
*params_summary
,
2114 vec
<ipa_predicate
> nonconstant_names
)
2116 ipa_predicate p
= true;
2119 tree param_type
= NULL_TREE
;
2120 ipa_predicate op_non_const
;
2123 struct agg_position_info aggpos
;
2125 /* What statements might be optimized away
2126 when their arguments are constant. */
2127 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2128 && gimple_code (stmt
) != GIMPLE_COND
2129 && gimple_code (stmt
) != GIMPLE_SWITCH
2130 && (gimple_code (stmt
) != GIMPLE_CALL
2131 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
2134 /* Stores will stay anyway. */
2135 if (gimple_store_p (stmt
))
2138 is_load
= gimple_assign_load_p (stmt
);
2140 /* Loads can be optimized when the value is known. */
2143 tree op
= gimple_assign_rhs1 (stmt
);
2144 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
2151 /* See if we understand all operands before we start
2152 adding conditionals. */
2153 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2155 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2156 /* For arguments we can build a condition. */
2157 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
2159 if (TREE_CODE (use
) != SSA_NAME
)
2161 /* If we know when operand is constant,
2162 we still can say something useful. */
2163 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
2170 add_condition (summary
, params_summary
,
2171 base_index
, param_type
, &aggpos
,
2172 ipa_predicate::changed
, NULL_TREE
);
2174 op_non_const
= false;
2175 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
2177 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
2180 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
2182 if (index
!= base_index
)
2183 p
= add_condition (summary
, params_summary
, index
,
2184 TREE_TYPE (parm
), NULL
,
2185 ipa_predicate::changed
, NULL_TREE
);
2190 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
2191 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
2193 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
2194 && gimple_op (stmt
, 0)
2195 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2196 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
2198 return op_non_const
;
2201 struct record_modified_bb_info
2208 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2209 probability how often it changes between USE_BB.
2210 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2211 is in different loop nest, we can do better.
2212 This is all just estimate. In theory we look for minimal cut separating
2213 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2217 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
2219 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
2220 if (l
&& l
->header
->count
< init_bb
->count
)
2225 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2226 set except for info->stmt. */
2229 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
2231 struct record_modified_bb_info
*info
=
2232 (struct record_modified_bb_info
*) data
;
2233 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
2235 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
2237 bitmap_set_bit (info
->bb_set
,
2238 SSA_NAME_IS_DEFAULT_DEF (vdef
)
2239 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
2241 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2242 gimple_bb (info
->stmt
))->index
);
2245 fprintf (dump_file
, " Param ");
2246 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
2247 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
2248 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
2250 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
2251 gimple_bb (info
->stmt
))->index
);
2252 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2257 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2258 will change since last invocation of STMT.
2260 Value 0 is reserved for compile time invariants.
2261 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2262 ought to be REG_BR_PROB_BASE / estimated_iters. */
2265 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2267 tree op
= gimple_call_arg (stmt
, i
);
2268 basic_block bb
= gimple_bb (stmt
);
2270 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2271 op
= TREE_OPERAND (op
, 0);
2273 tree base
= get_base_address (op
);
2275 /* Global invariants never change. */
2276 if (is_gimple_min_invariant (base
))
2279 /* We would have to do non-trivial analysis to really work out what
2280 is the probability of value to change (i.e. when init statement
2281 is in a sibling loop of the call).
2283 We do an conservative estimate: when call is executed N times more often
2284 than the statement defining value, we take the frequency 1/N. */
2285 if (TREE_CODE (base
) == SSA_NAME
)
2287 profile_count init_count
;
2289 if (!bb
->count
.nonzero_p ())
2290 return REG_BR_PROB_BASE
;
2292 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2293 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2295 init_count
= get_minimal_bb
2296 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2297 gimple_bb (stmt
))->count
;
2299 if (init_count
< bb
->count
)
2300 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2301 * REG_BR_PROB_BASE
).to_int (), 1);
2302 return REG_BR_PROB_BASE
;
2307 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2308 struct record_modified_bb_info info
;
2309 tree init
= ctor_for_folding (base
);
2311 if (init
!= error_mark_node
)
2313 if (!bb
->count
.nonzero_p () || fbi
->aa_walk_budget
== 0)
2314 return REG_BR_PROB_BASE
;
2317 fprintf (dump_file
, " Analyzing param change probability of ");
2318 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2319 fprintf (dump_file
, "\n");
2321 ao_ref_init (&refd
, op
);
2324 info
.bb_set
= BITMAP_ALLOC (NULL
);
2326 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2327 NULL
, NULL
, fbi
->aa_walk_budget
);
2329 fbi
->aa_walk_budget
-= walked
;
2330 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2333 fbi
->aa_walk_budget
= 0;
2337 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2339 fprintf (dump_file
, " Set in same BB as used.\n");
2341 BITMAP_FREE (info
.bb_set
);
2342 return REG_BR_PROB_BASE
;
2347 /* Lookup the most frequent update of the value and believe that
2348 it dominates all the other; precise analysis here is difficult. */
2349 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2350 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2353 fprintf (dump_file
, " Set with count ");
2354 max
.dump (dump_file
);
2355 fprintf (dump_file
, " and used with count ");
2356 bb
->count
.dump (dump_file
);
2357 fprintf (dump_file
, " freq %f\n",
2358 max
.to_sreal_scale (bb
->count
).to_double ());
2361 BITMAP_FREE (info
.bb_set
);
2362 if (max
< bb
->count
)
2363 return MAX ((max
.to_sreal_scale (bb
->count
)
2364 * REG_BR_PROB_BASE
).to_int (), 1);
2365 return REG_BR_PROB_BASE
;
2369 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2370 sub-graph and if the predicate the condition depends on is known. If so,
2371 return true and store the pointer the predicate in *P. */
2374 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2375 ipa_fn_summary
*summary
,
2376 class ipa_node_params
*params_summary
,
2379 vec
<ipa_predicate
> nonconstant_names
)
2383 basic_block first_bb
= NULL
;
2385 if (single_pred_p (bb
))
2391 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2393 if (single_succ_p (e
->src
))
2395 if (!single_pred_p (e
->src
))
2398 first_bb
= single_pred (e
->src
);
2399 else if (single_pred (e
->src
) != first_bb
)
2406 else if (e
->src
!= first_bb
)
2414 gcond
*stmt
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (first_bb
));
2416 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2419 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2420 gimple_cond_lhs (stmt
),
2428 /* Given a PHI statement in a function described by inline properties SUMMARY
2429 and *P being the predicate describing whether the selected PHI argument is
2430 known, store a predicate for the result of the PHI statement into
2431 NONCONSTANT_NAMES, if possible. */
2434 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2436 vec
<ipa_predicate
> nonconstant_names
)
2440 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2442 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2443 if (!is_gimple_min_invariant (arg
))
2445 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2446 *p
= p
->or_with (summary
->conds
,
2447 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2455 fprintf (dump_file
, "\t\tphi predicate: ");
2456 p
->dump (dump_file
, summary
->conds
);
2458 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2461 /* For a typical usage of __builtin_expect (a<b, 1), we
2462 may introduce an extra relation stmt:
2463 With the builtin, we have
2466 t3 = __builtin_expect (t2, 1);
2469 Without the builtin, we have
2472 This affects the size/time estimation and may have
2473 an impact on the earlier inlining.
2474 Here find this pattern and fix it up later. */
2477 find_foldable_builtin_expect (basic_block bb
)
2479 gimple_stmt_iterator bsi
;
2481 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2483 gimple
*stmt
= gsi_stmt (bsi
);
2484 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2485 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2486 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2488 tree var
= gimple_call_lhs (stmt
);
2489 tree arg
= gimple_call_arg (stmt
, 0);
2490 use_operand_p use_p
;
2497 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2499 while (TREE_CODE (arg
) == SSA_NAME
)
2501 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2502 if (!is_gimple_assign (stmt_tmp
))
2504 switch (gimple_assign_rhs_code (stmt_tmp
))
2523 arg
= gimple_assign_rhs1 (stmt_tmp
);
2526 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2527 && gimple_code (use_stmt
) == GIMPLE_COND
)
2534 /* Return true when the basic blocks contains only clobbers followed by RESX.
2535 Such BBs are kept around to make removal of dead stores possible with
2536 presence of EH and will be optimized out by optimize_clobbers later in the
2539 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2540 that can be clobber only, too.. When it is false, the RESX is not necessary
2541 on the end of basic block. */
2544 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2546 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2552 if (gsi_end_p (gsi
))
2554 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2558 else if (!single_succ_p (bb
))
2561 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2563 gimple
*stmt
= gsi_stmt (gsi
);
2564 if (is_gimple_debug (stmt
))
2566 if (gimple_clobber_p (stmt
))
2568 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2573 /* See if all predecessors are either throws or clobber only BBs. */
2574 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2575 if (!(e
->flags
& EDGE_EH
)
2576 && !clobber_only_eh_bb_p (e
->src
, false))
2582 /* Return true if STMT compute a floating point expression that may be affected
2583 by -ffast-math and similar flags. */
2586 fp_expression_p (gimple
*stmt
)
2591 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2592 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2597 /* Return true if T references memory location that is local
2598 for the function (that means, dead after return) or read-only. */
2601 refs_local_or_readonly_memory_p (tree t
)
2603 /* Non-escaping memory is fine. */
2604 t
= get_base_address (t
);
2605 if ((TREE_CODE (t
) == MEM_REF
2606 || TREE_CODE (t
) == TARGET_MEM_REF
))
2607 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2609 /* Automatic variables are fine. */
2611 && auto_var_in_fn_p (t
, current_function_decl
))
2614 /* Read-only variables are fine. */
2615 if (DECL_P (t
) && TREE_READONLY (t
))
2621 /* Return true if T is a pointer pointing to memory location that is local
2622 for the function (that means, dead after return) or read-only. */
2625 points_to_local_or_readonly_memory_p (tree t
)
2627 /* See if memory location is clearly invalid. */
2628 if (integer_zerop (t
))
2629 return flag_delete_null_pointer_checks
;
2630 if (TREE_CODE (t
) == SSA_NAME
)
2632 /* For IPA passes we can consinder accesses to return slot local
2633 even if it is not local in the sense that memory is dead by
2634 the end of founction.
2635 The outer function will see a store in the call assignment
2636 and thus this will do right thing for all uses of this
2637 function in the current IPA passes (modref, pure/const discovery
2638 and inlining heuristics). */
2639 if (DECL_RESULT (current_function_decl
)
2640 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl
))
2641 && t
== ssa_default_def (cfun
, DECL_RESULT (current_function_decl
)))
2643 return !ptr_deref_may_alias_global_p (t
, false);
2645 if (TREE_CODE (t
) == ADDR_EXPR
)
2646 return refs_local_or_readonly_memory_p (TREE_OPERAND (t
, 0));
2650 /* Return true if T is a pointer pointing to memory location that is possible
2651 sra candidate if all functions it is passed to are inlined. */
2654 points_to_possible_sra_candidate_p (tree t
)
2656 if (TREE_CODE (t
) != ADDR_EXPR
)
2659 t
= get_base_address (TREE_OPERAND (t
, 0));
2661 /* Automatic variables are fine. */
2663 && auto_var_in_fn_p (t
, current_function_decl
))
2668 /* Analyze function body for NODE.
2669 EARLY indicates run from early optimization pipeline. */
2672 analyze_function_body (struct cgraph_node
*node
, bool early
)
2674 sreal time
= opt_for_fn (node
->decl
, param_uninlined_function_time
);
2675 /* Estimate static overhead for function prologue/epilogue and alignment. */
2676 int size
= opt_for_fn (node
->decl
, param_uninlined_function_insns
);
2677 /* Benefits are scaled by probability of elimination that is in range
2680 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2682 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2683 ipa_node_params
*params_summary
2684 = early
? NULL
: ipa_node_params_sum
->get (node
);
2685 ipa_predicate bb_predicate
;
2686 struct ipa_func_body_info fbi
;
2687 vec
<ipa_predicate
> nonconstant_names
= vNULL
;
2690 gimple
*fix_builtin_expect_stmt
;
2692 gcc_assert (my_function
&& my_function
->cfg
);
2693 gcc_assert (cfun
== my_function
);
2695 memset(&fbi
, 0, sizeof(fbi
));
2696 vec_free (info
->conds
);
2698 info
->size_time_table
.release ();
2699 info
->call_size_time_table
.release ();
2701 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2702 so we can produce proper inline hints.
2704 When optimizing and analyzing for early inliner, initialize node params
2705 so we can produce correct BB predicates. */
2707 if (opt_for_fn (node
->decl
, optimize
))
2709 calculate_dominance_info (CDI_DOMINATORS
);
2710 calculate_dominance_info (CDI_POST_DOMINATORS
);
2712 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2715 ipa_check_create_node_params ();
2716 ipa_initialize_node_params (node
);
2719 if (ipa_node_params_sum
)
2722 fbi
.info
= ipa_node_params_sum
->get (node
);
2723 fbi
.bb_infos
= vNULL
;
2724 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
), true);
2725 fbi
.param_count
= count_formal_params (node
->decl
);
2726 fbi
.aa_walk_budget
= opt_for_fn (node
->decl
, param_ipa_max_aa_steps
);
2728 nonconstant_names
.safe_grow_cleared
2729 (SSANAMES (my_function
)->length (), true);
2734 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2735 node
->dump_name ());
2737 /* When we run into maximal number of entries, we assign everything to the
2738 constant truth case. Be sure to have it in list. */
2739 bb_predicate
= true;
2740 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2742 bb_predicate
= ipa_predicate::not_inlined ();
2743 info
->account_size_time (opt_for_fn (node
->decl
,
2744 param_uninlined_function_insns
)
2745 * ipa_fn_summary::size_scale
,
2746 opt_for_fn (node
->decl
,
2747 param_uninlined_function_time
),
2751 /* Only look for target information for inlinable functions. */
2752 bool scan_for_target_info
=
2754 && targetm
.target_option
.need_ipa_fn_target_info (node
->decl
,
2758 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2759 const profile_count entry_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2760 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2761 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2762 for (n
= 0; n
< nblocks
; n
++)
2764 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2765 freq
= bb
->count
.to_sreal_scale (entry_count
);
2766 if (clobber_only_eh_bb_p (bb
))
2768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2769 fprintf (dump_file
, "\n Ignoring BB %i;"
2770 " it will be optimized away by cleanup_clobbers\n",
2775 /* TODO: Obviously predicates can be propagated down across CFG. */
2779 bb_predicate
= *(ipa_predicate
*)bb
->aux
;
2781 bb_predicate
= false;
2784 bb_predicate
= true;
2786 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2788 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2789 bb_predicate
.dump (dump_file
, info
->conds
);
2792 if (fbi
.info
&& nonconstant_names
.exists ())
2794 ipa_predicate phi_predicate
;
2795 bool first_phi
= true;
2797 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2801 && !phi_result_unknown_predicate (&fbi
, info
,
2808 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2810 fprintf (dump_file
, " ");
2811 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2813 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2818 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2820 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2821 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2823 gimple
*stmt
= gsi_stmt (bsi
);
2824 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2825 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2827 ipa_predicate will_be_nonconstant
;
2829 /* This relation stmt should be folded after we remove
2830 __builtin_expect call. Adjust the cost here. */
2831 if (stmt
== fix_builtin_expect_stmt
)
2837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2839 fprintf (dump_file
, " ");
2840 print_gimple_stmt (dump_file
, stmt
, 0);
2841 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2842 freq
.to_double (), this_size
,
2846 if (is_gimple_call (stmt
)
2847 && !gimple_call_internal_p (stmt
))
2849 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2850 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2852 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2853 resolved as constant. We however don't want to optimize
2854 out the cgraph edges. */
2855 if (nonconstant_names
.exists ()
2856 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2857 && gimple_call_lhs (stmt
)
2858 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2860 ipa_predicate false_p
= false;
2861 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2864 if (ipa_node_params_sum
)
2866 int count
= gimple_call_num_args (stmt
);
2870 es
->param
.safe_grow_cleared (count
, true);
2871 for (i
= 0; i
< count
; i
++)
2873 int prob
= param_change_prob (&fbi
, stmt
, i
);
2874 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2875 es
->param
[i
].change_prob
= prob
;
2876 es
->param
[i
].points_to_local_or_readonly_memory
2877 = points_to_local_or_readonly_memory_p
2878 (gimple_call_arg (stmt
, i
));
2879 es
->param
[i
].points_to_possible_sra_candidate
2880 = points_to_possible_sra_candidate_p
2881 (gimple_call_arg (stmt
, i
));
2884 /* We cannot setup VLA parameters during inlining. */
2885 for (unsigned int i
= 0; i
< gimple_call_num_args (stmt
); ++i
)
2886 if (TREE_CODE (gimple_call_arg (stmt
, i
)) == WITH_SIZE_EXPR
)
2888 edge
->inline_failed
= CIF_FUNCTION_NOT_INLINABLE
;
2891 es
->call_stmt_size
= this_size
;
2892 es
->call_stmt_time
= this_time
;
2893 es
->loop_depth
= bb_loop_depth (bb
);
2894 edge_set_predicate (edge
, &bb_predicate
);
2895 if (edge
->speculative
)
2897 cgraph_edge
*indirect
2898 = edge
->speculative_call_indirect_edge ();
2899 ipa_call_summary
*es2
2900 = ipa_call_summaries
->get_create (indirect
);
2901 ipa_call_summaries
->duplicate (edge
, indirect
,
2904 /* Edge is the first direct call.
2905 create and duplicate call summaries for multiple
2906 speculative call targets. */
2907 for (cgraph_edge
*direct
2908 = edge
->next_speculative_call_target ();
2910 direct
= direct
->next_speculative_call_target ())
2912 ipa_call_summary
*es3
2913 = ipa_call_summaries
->get_create (direct
);
2914 ipa_call_summaries
->duplicate (edge
, direct
,
2920 /* TODO: When conditional jump or switch is known to be constant, but
2921 we did not translate it into the predicates, we really can account
2922 just maximum of the possible paths. */
2925 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2926 stmt
, nonconstant_names
);
2928 will_be_nonconstant
= true;
2929 if (this_time
|| this_size
)
2931 sreal final_time
= (sreal
)this_time
* freq
;
2932 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2933 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2935 "\t\t50%% will be eliminated by inlining\n");
2936 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2937 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2939 ipa_predicate p
= bb_predicate
& will_be_nonconstant
;
2940 int parm
= load_or_store_of_ptr_parameter (&fbi
, stmt
);
2941 ipa_predicate sra_predicate
= true;
2943 sra_predicate
&= add_condition (info
, params_summary
, parm
,
2944 ptr_type_node
, NULL
,
2945 ipa_predicate::not_sra_candidate
, NULL
, 0);
2947 /* We can ignore statement when we proved it is never going
2948 to happen, but we cannot do that for call statements
2949 because edges are accounted specially. */
2951 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2957 /* We account everything but the calls. Calls have their own
2958 size/time info attached to cgraph edges. This is necessary
2959 in order to make the cost disappear after inlining. */
2960 if (!is_gimple_call (stmt
))
2965 = bb_predicate
& ipa_predicate::not_inlined () & sra_predicate
;
2966 info
->account_size_time (this_size
* prob
,
2967 (final_time
* prob
) / 2, ip
,
2971 info
->account_size_time (this_size
* (2 - prob
),
2972 (final_time
* (2 - prob
) / 2),
2973 bb_predicate
& sra_predicate
,
2977 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2979 info
->fp_expressions
= true;
2981 fprintf (dump_file
, " fp_expression set\n");
2985 /* For target specific information, we want to scan all statements
2986 rather than those statements with non-zero weights, to avoid
2987 missing to scan something interesting for target information,
2988 such as: internal function calls. */
2989 if (scan_for_target_info
)
2990 scan_for_target_info
=
2991 targetm
.target_option
.update_ipa_fn_target_info
2992 (info
->target_info
, stmt
);
2994 /* Account cost of address calculations in the statements. */
2995 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2997 for (tree op
= gimple_op (stmt
, i
);
2998 op
&& handled_component_p (op
);
2999 op
= TREE_OPERAND (op
, 0))
3000 if ((TREE_CODE (op
) == ARRAY_REF
3001 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
3002 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
3004 ipa_predicate p
= bb_predicate
;
3006 p
= p
& will_be_nonconstant_expr_predicate
3007 (&fbi
, info
, params_summary
,
3008 TREE_OPERAND (op
, 1),
3016 "\t\tAccounting address calculation.\n");
3017 info
->account_size_time (ipa_fn_summary::size_scale
,
3029 if (nonconstant_names
.exists () && !early
)
3031 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3032 unsigned max_loop_predicates
= opt_for_fn (node
->decl
,
3033 param_ipa_max_loop_predicates
);
3035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3036 flow_loops_dump (dump_file
, NULL
, 0);
3038 for (auto loop
: loops_list (cfun
, 0))
3040 ipa_predicate loop_iterations
= true;
3044 class tree_niter_desc niter_desc
;
3045 if (!loop
->header
->aux
)
3048 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
3049 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
3051 bb_predicate
= *(ipa_predicate
*)loop
->header
->aux
;
3052 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
3053 FOR_EACH_VEC_ELT (exits
, j
, ex
)
3054 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
3055 && !is_gimple_min_invariant (niter_desc
.niter
))
3057 ipa_predicate will_be_nonconstant
3058 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3062 if (will_be_nonconstant
!= true)
3063 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3064 if (will_be_nonconstant
!= true
3065 && will_be_nonconstant
!= false)
3066 loop_iterations
&= will_be_nonconstant
;
3068 add_freqcounting_predicate (&s
->loop_iterations
, loop_iterations
,
3069 phdr_freq
, max_loop_predicates
);
3072 /* To avoid quadratic behavior we analyze stride predicates only
3073 with respect to the containing loop. Thus we simply iterate
3074 over all defs in the outermost loop body. */
3075 for (class loop
*loop
= loops_for_fn (cfun
)->tree_root
->inner
;
3076 loop
!= NULL
; loop
= loop
->next
)
3078 ipa_predicate loop_stride
= true;
3079 basic_block
*body
= get_loop_body (loop
);
3080 profile_count phdr_count
= loop_preheader_edge (loop
)->count ();
3081 sreal phdr_freq
= phdr_count
.to_sreal_scale (entry_count
);
3082 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
3084 gimple_stmt_iterator gsi
;
3088 bb_predicate
= *(ipa_predicate
*)body
[i
]->aux
;
3089 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
3092 gimple
*stmt
= gsi_stmt (gsi
);
3094 if (!is_gimple_assign (stmt
))
3097 tree def
= gimple_assign_lhs (stmt
);
3098 if (TREE_CODE (def
) != SSA_NAME
)
3102 if (!simple_iv (loop_containing_stmt (stmt
),
3103 loop_containing_stmt (stmt
),
3105 || is_gimple_min_invariant (iv
.step
))
3108 ipa_predicate will_be_nonconstant
3109 = will_be_nonconstant_expr_predicate (&fbi
, info
,
3113 if (will_be_nonconstant
!= true)
3114 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
3115 if (will_be_nonconstant
!= true
3116 && will_be_nonconstant
!= false)
3117 loop_stride
= loop_stride
& will_be_nonconstant
;
3120 add_freqcounting_predicate (&s
->loop_strides
, loop_stride
,
3121 phdr_freq
, max_loop_predicates
);
3126 FOR_ALL_BB_FN (bb
, my_function
)
3132 edge_predicate_pool
.remove ((ipa_predicate
*)bb
->aux
);
3134 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3137 edge_predicate_pool
.remove ((ipa_predicate
*)e
->aux
);
3141 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
3142 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
3144 ss
->self_size
= size
;
3145 nonconstant_names
.release ();
3146 ipa_release_body_info (&fbi
);
3147 if (opt_for_fn (node
->decl
, optimize
))
3150 loop_optimizer_finalize ();
3151 else if (!ipa_edge_args_sum
)
3152 ipa_free_all_node_params ();
3153 free_dominance_info (CDI_DOMINATORS
);
3154 free_dominance_info (CDI_POST_DOMINATORS
);
3158 fprintf (dump_file
, "\n");
3159 ipa_dump_fn_summary (dump_file
, node
);
3164 /* Compute function summary.
3165 EARLY is true when we compute parameters during early opts. */
3168 compute_fn_summary (struct cgraph_node
*node
, bool early
)
3170 HOST_WIDE_INT self_stack_size
;
3171 struct cgraph_edge
*e
;
3173 gcc_assert (!node
->inlined_to
);
3175 if (!ipa_fn_summaries
)
3176 ipa_fn_summary_alloc ();
3178 /* Create a new ipa_fn_summary. */
3179 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
3180 ipa_fn_summaries
->remove (node
);
3181 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3182 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3184 /* Estimate the stack size for the function if we're optimizing. */
3185 self_stack_size
= optimize
&& !node
->thunk
3186 ? estimated_stack_frame_size (node
) : 0;
3187 size_info
->estimated_self_stack_size
= self_stack_size
;
3188 info
->estimated_stack_size
= self_stack_size
;
3192 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
3193 ipa_predicate t
= true;
3195 node
->can_change_signature
= false;
3196 es
->call_stmt_size
= eni_size_weights
.call_cost
;
3197 es
->call_stmt_time
= eni_time_weights
.call_cost
;
3198 info
->account_size_time (ipa_fn_summary::size_scale
3199 * opt_for_fn (node
->decl
,
3200 param_uninlined_function_thunk_insns
),
3201 opt_for_fn (node
->decl
,
3202 param_uninlined_function_thunk_time
), t
, t
);
3203 t
= ipa_predicate::not_inlined ();
3204 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
3205 ipa_update_overall_fn_summary (node
);
3206 size_info
->self_size
= size_info
->size
;
3207 if (stdarg_p (TREE_TYPE (node
->decl
)))
3209 info
->inlinable
= false;
3210 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
3213 info
->inlinable
= true;
3217 /* Even is_gimple_min_invariant rely on current_function_decl. */
3218 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3220 /* During IPA profile merging we may be called w/o virtual SSA form
3222 update_ssa (TODO_update_ssa_only_virtuals
);
3224 /* Can this function be inlined at all? */
3225 if (!opt_for_fn (node
->decl
, optimize
)
3226 && !lookup_attribute ("always_inline",
3227 DECL_ATTRIBUTES (node
->decl
)))
3228 info
->inlinable
= false;
3230 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
3232 bool no_signature
= false;
3233 /* Type attributes can use parameter indices to describe them.
3234 Special case fn spec since we can safely preserve them in
3235 modref summaries. */
3236 for (tree list
= TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
));
3237 list
&& !no_signature
; list
= TREE_CHAIN (list
))
3238 if (!ipa_param_adjustments::type_attribute_allowed_p
3239 (get_attribute_name (list
)))
3243 fprintf (dump_file
, "No signature change:"
3244 " function type has unhandled attribute %s.\n",
3245 IDENTIFIER_POINTER (get_attribute_name (list
)));
3247 no_signature
= true;
3249 for (tree parm
= DECL_ARGUMENTS (node
->decl
);
3250 parm
&& !no_signature
; parm
= DECL_CHAIN (parm
))
3251 if (variably_modified_type_p (TREE_TYPE (parm
), node
->decl
))
3255 fprintf (dump_file
, "No signature change:"
3256 " has parameter with variably modified type.\n");
3258 no_signature
= true;
3261 /* Likewise for #pragma omp declare simd functions or functions
3262 with simd attribute. */
3264 || lookup_attribute ("omp declare simd",
3265 DECL_ATTRIBUTES (node
->decl
)))
3266 node
->can_change_signature
= false;
3269 /* Otherwise, inlinable functions always can change signature. */
3270 if (info
->inlinable
)
3271 node
->can_change_signature
= true;
3274 /* Functions calling builtin_apply cannot change signature. */
3275 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3277 tree
cdecl = e
->callee
->decl
;
3278 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
,
3282 node
->can_change_signature
= !e
;
3285 analyze_function_body (node
, early
);
3289 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3290 size_info
->size
= size_info
->self_size
;
3291 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
3293 /* Code above should compute exactly the same result as
3294 ipa_update_overall_fn_summary except for case when speculative
3295 edges are present since these are accounted to size but not
3296 self_size. Do not compare time since different order the roundoff
3297 errors result in slight changes. */
3298 ipa_update_overall_fn_summary (node
);
3301 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3304 gcc_assert (e
|| size_info
->size
== size_info
->self_size
);
3309 /* Compute parameters of functions used by inliner using
3310 current_function_decl. */
3313 compute_fn_summary_for_current (void)
3315 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
3319 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3320 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3321 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3324 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
3325 int *size
, int *time
,
3326 ipa_call_arg_values
*avals
)
3329 struct cgraph_node
*callee
;
3330 class ipa_fn_summary
*isummary
;
3331 enum availability avail
;
3335 || (!avals
->m_known_vals
.length() && !avals
->m_known_contexts
.length ()))
3337 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
3340 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3341 if (!target
|| speculative
)
3344 /* Account for difference in cost between indirect and direct calls. */
3345 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
3346 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
3347 gcc_checking_assert (*time
>= 0);
3348 gcc_checking_assert (*size
>= 0);
3350 callee
= cgraph_node::get (target
);
3351 if (!callee
|| !callee
->definition
)
3353 callee
= callee
->function_symbol (&avail
);
3354 if (avail
< AVAIL_AVAILABLE
)
3356 isummary
= ipa_fn_summaries
->get (callee
);
3357 if (isummary
== NULL
)
3360 return isummary
->inlinable
;
3363 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3364 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3365 devirtualized. AVALS, if non-NULL, describes the context of the call site
3366 as far as values of parameters are concerened. */
3369 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
3370 sreal
*time
, ipa_call_arg_values
*avals
,
3373 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3374 int call_size
= es
->call_stmt_size
;
3375 int call_time
= es
->call_stmt_time
;
3378 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
3379 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
, avals
))
3380 *hints
|= INLINE_HINT_indirect_call
;
3381 cur_size
= call_size
* ipa_fn_summary::size_scale
;
3384 *min_size
+= cur_size
;
3386 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
3390 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3391 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3394 Helper for estimate_calls_size_and_time which does the same but
3395 (in most cases) faster. */
3398 estimate_calls_size_and_time_1 (struct cgraph_node
*node
, int *size
,
3399 int *min_size
, sreal
*time
,
3401 clause_t possible_truths
,
3402 ipa_call_arg_values
*avals
)
3404 struct cgraph_edge
*e
;
3405 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3407 if (!e
->inline_failed
)
3409 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3410 estimate_calls_size_and_time_1 (e
->callee
, size
, min_size
, time
,
3411 hints
, possible_truths
, avals
);
3415 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3417 /* Do not care about zero sized builtins. */
3418 if (!es
->call_stmt_size
)
3420 gcc_checking_assert (!es
->call_stmt_time
);
3424 || es
->predicate
->evaluate (possible_truths
))
3426 /* Predicates of calls shall not use NOT_CHANGED codes,
3427 so we do not need to compute probabilities. */
3428 estimate_edge_size_and_time (e
, size
,
3429 es
->predicate
? NULL
: min_size
,
3430 time
, avals
, hints
);
3433 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3435 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3437 || es
->predicate
->evaluate (possible_truths
))
3438 estimate_edge_size_and_time (e
, size
,
3439 es
->predicate
? NULL
: min_size
,
3440 time
, avals
, hints
);
3444 /* Populate sum->call_size_time_table for edges from NODE. */
3447 summarize_calls_size_and_time (struct cgraph_node
*node
,
3448 ipa_fn_summary
*sum
)
3450 struct cgraph_edge
*e
;
3451 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3453 if (!e
->inline_failed
)
3455 gcc_checking_assert (!ipa_call_summaries
->get (e
));
3456 summarize_calls_size_and_time (e
->callee
, sum
);
3462 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3464 ipa_predicate pred
= true;
3465 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3468 pred
= *es
->predicate
;
3469 sum
->account_size_time (size
, time
, pred
, pred
, true);
3471 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3476 estimate_edge_size_and_time (e
, &size
, NULL
, &time
, NULL
, NULL
);
3477 ipa_predicate pred
= true;
3478 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3481 pred
= *es
->predicate
;
3482 sum
->account_size_time (size
, time
, pred
, pred
, true);
3486 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3487 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3488 context of the call site. */
3491 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
3492 int *min_size
, sreal
*time
,
3494 clause_t possible_truths
,
3495 ipa_call_arg_values
*avals
)
3497 class ipa_fn_summary
*sum
= ipa_fn_summaries
->get (node
);
3498 bool use_table
= true;
3500 gcc_assert (node
->callees
|| node
->indirect_calls
);
3502 /* During early inlining we do not calculate info for very
3503 large functions and thus there is no need for producing
3505 if (!ipa_node_params_sum
)
3507 /* Do not calculate summaries for simple wrappers; it is waste
3509 else if (node
->callees
&& node
->indirect_calls
3510 && node
->callees
->inline_failed
&& !node
->callees
->next_callee
)
3512 /* If there is an indirect edge that may be optimized, we need
3513 to go the slow way. */
3514 else if (avals
&& hints
3515 && (avals
->m_known_vals
.length ()
3516 || avals
->m_known_contexts
.length ()
3517 || avals
->m_known_aggs
.length ()))
3519 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (node
);
3520 unsigned int nargs
= params_summary
3521 ? ipa_get_param_count (params_summary
) : 0;
3523 for (unsigned int i
= 0; i
< nargs
&& use_table
; i
++)
3525 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3526 && (avals
->safe_sval_at (i
)
3527 || (ipa_argagg_value_list (avals
).value_for_index_p (i
))))
3529 else if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3530 && (avals
->m_known_contexts
.length () > i
3531 && !avals
->m_known_contexts
[i
].useless_p ()))
3536 /* Fast path is via the call size time table. */
3539 /* Build summary if it is absent. */
3540 if (!sum
->call_size_time_table
.length ())
3542 ipa_predicate true_pred
= true;
3543 sum
->account_size_time (0, 0, true_pred
, true_pred
, true);
3544 summarize_calls_size_and_time (node
, sum
);
3547 int old_size
= *size
;
3548 sreal old_time
= time
? *time
: 0;
3551 *min_size
+= sum
->call_size_time_table
[0].size
;
3556 /* Walk the table and account sizes and times. */
3557 for (i
= 0; sum
->call_size_time_table
.iterate (i
, &e
);
3559 if (e
->exec_predicate
.evaluate (possible_truths
))
3566 /* Be careful and see if both methods agree. */
3567 if ((flag_checking
|| dump_file
)
3568 /* Do not try to sanity check when we know we lost some
3570 && sum
->call_size_time_table
.length ()
3571 < ipa_fn_summary::max_size_time_table_size
)
3573 estimate_calls_size_and_time_1 (node
, &old_size
, NULL
, &old_time
, NULL
,
3574 possible_truths
, avals
);
3575 gcc_assert (*size
== old_size
);
3576 if (time
&& (*time
- old_time
> 1 || *time
- old_time
< -1)
3578 fprintf (dump_file
, "Time mismatch in call summary %f!=%f\n",
3579 old_time
.to_double (),
3580 time
->to_double ());
3583 /* Slow path by walking all edges. */
3585 estimate_calls_size_and_time_1 (node
, size
, min_size
, time
, hints
,
3586 possible_truths
, avals
);
3589 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3590 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3593 ipa_call_context::ipa_call_context (cgraph_node
*node
, clause_t possible_truths
,
3594 clause_t nonspec_possible_truths
,
3595 vec
<inline_param_summary
>
3596 inline_param_summary
,
3597 ipa_auto_call_arg_values
*arg_values
)
3598 : m_node (node
), m_possible_truths (possible_truths
),
3599 m_nonspec_possible_truths (nonspec_possible_truths
),
3600 m_inline_param_summary (inline_param_summary
),
3601 m_avals (arg_values
)
3605 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3608 ipa_cached_call_context::duplicate_from (const ipa_call_context
&ctx
)
3610 m_node
= ctx
.m_node
;
3611 m_possible_truths
= ctx
.m_possible_truths
;
3612 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3613 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3614 unsigned int nargs
= params_summary
3615 ? ipa_get_param_count (params_summary
) : 0;
3617 m_inline_param_summary
= vNULL
;
3618 /* Copy the info only if there is at least one useful entry. */
3619 if (ctx
.m_inline_param_summary
.exists ())
3621 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3623 for (unsigned int i
= 0; i
< n
; i
++)
3624 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3625 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3627 m_inline_param_summary
3628 = ctx
.m_inline_param_summary
.copy ();
3632 m_avals
.m_known_vals
= vNULL
;
3633 if (ctx
.m_avals
.m_known_vals
.exists ())
3635 unsigned int n
= MIN (ctx
.m_avals
.m_known_vals
.length (), nargs
);
3637 for (unsigned int i
= 0; i
< n
; i
++)
3638 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3639 && ctx
.m_avals
.m_known_vals
[i
])
3641 m_avals
.m_known_vals
= ctx
.m_avals
.m_known_vals
.copy ();
3646 m_avals
.m_known_contexts
= vNULL
;
3647 if (ctx
.m_avals
.m_known_contexts
.exists ())
3649 unsigned int n
= MIN (ctx
.m_avals
.m_known_contexts
.length (), nargs
);
3651 for (unsigned int i
= 0; i
< n
; i
++)
3652 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3653 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3655 m_avals
.m_known_contexts
= ctx
.m_avals
.m_known_contexts
.copy ();
3660 m_avals
.m_known_aggs
= vNULL
;
3661 if (ctx
.m_avals
.m_known_aggs
.exists ())
3663 const ipa_argagg_value_list
avl (&ctx
.m_avals
);
3664 for (unsigned int i
= 0; i
< nargs
; i
++)
3665 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3666 && avl
.value_for_index_p (i
))
3668 m_avals
.m_known_aggs
= ctx
.m_avals
.m_known_aggs
.copy ();
3673 m_avals
.m_known_value_ranges
= vNULL
;
3676 /* Release memory used by known_vals/contexts/aggs vectors. and
3677 inline_param_summary. */
3680 ipa_cached_call_context::release ()
3682 /* See if context is initialized at first place. */
3685 m_avals
.m_known_aggs
.release ();
3686 m_avals
.m_known_vals
.release ();
3687 m_avals
.m_known_contexts
.release ();
3688 m_inline_param_summary
.release ();
3691 /* Return true if CTX describes the same call context as THIS. */
3694 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3696 if (m_node
!= ctx
.m_node
3697 || m_possible_truths
!= ctx
.m_possible_truths
3698 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3701 ipa_node_params
*params_summary
= ipa_node_params_sum
->get (m_node
);
3702 unsigned int nargs
= params_summary
3703 ? ipa_get_param_count (params_summary
) : 0;
3705 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3707 for (unsigned int i
= 0; i
< nargs
; i
++)
3709 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3711 if (i
>= m_inline_param_summary
.length ()
3712 || m_inline_param_summary
[i
].useless_p ())
3714 if (i
< ctx
.m_inline_param_summary
.length ()
3715 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3719 if (i
>= ctx
.m_inline_param_summary
.length ()
3720 || ctx
.m_inline_param_summary
[i
].useless_p ())
3722 if (i
< m_inline_param_summary
.length ()
3723 && !m_inline_param_summary
[i
].useless_p ())
3727 if (!m_inline_param_summary
[i
].equal_to
3728 (ctx
.m_inline_param_summary
[i
]))
3732 if (m_avals
.m_known_vals
.exists () || ctx
.m_avals
.m_known_vals
.exists ())
3734 for (unsigned int i
= 0; i
< nargs
; i
++)
3736 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3738 if (i
>= m_avals
.m_known_vals
.length () || !m_avals
.m_known_vals
[i
])
3740 if (i
< ctx
.m_avals
.m_known_vals
.length ()
3741 && ctx
.m_avals
.m_known_vals
[i
])
3745 if (i
>= ctx
.m_avals
.m_known_vals
.length ()
3746 || !ctx
.m_avals
.m_known_vals
[i
])
3748 if (i
< m_avals
.m_known_vals
.length () && m_avals
.m_known_vals
[i
])
3752 if (m_avals
.m_known_vals
[i
] != ctx
.m_avals
.m_known_vals
[i
])
3756 if (m_avals
.m_known_contexts
.exists ()
3757 || ctx
.m_avals
.m_known_contexts
.exists ())
3759 for (unsigned int i
= 0; i
< nargs
; i
++)
3761 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3763 if (i
>= m_avals
.m_known_contexts
.length ()
3764 || m_avals
.m_known_contexts
[i
].useless_p ())
3766 if (i
< ctx
.m_avals
.m_known_contexts
.length ()
3767 && !ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3771 if (i
>= ctx
.m_avals
.m_known_contexts
.length ()
3772 || ctx
.m_avals
.m_known_contexts
[i
].useless_p ())
3774 if (i
< m_avals
.m_known_contexts
.length ()
3775 && !m_avals
.m_known_contexts
[i
].useless_p ())
3779 if (!m_avals
.m_known_contexts
[i
].equal_to
3780 (ctx
.m_avals
.m_known_contexts
[i
]))
3784 if (m_avals
.m_known_aggs
.exists () || ctx
.m_avals
.m_known_aggs
.exists ())
3786 unsigned i
= 0, j
= 0;
3787 while (i
< m_avals
.m_known_aggs
.length ()
3788 || j
< ctx
.m_avals
.m_known_aggs
.length ())
3790 if (i
>= m_avals
.m_known_aggs
.length ())
3792 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3793 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3798 if (j
>= ctx
.m_avals
.m_known_aggs
.length ())
3800 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3801 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3807 int idx1
= m_avals
.m_known_aggs
[i
].index
;
3808 int idx2
= ctx
.m_avals
.m_known_aggs
[j
].index
;
3811 if (ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3818 if (ipa_is_param_used_by_indirect_call (params_summary
, idx2
))
3823 if (!ipa_is_param_used_by_indirect_call (params_summary
, idx1
))
3830 if ((m_avals
.m_known_aggs
[i
].unit_offset
3831 != ctx
.m_avals
.m_known_aggs
[j
].unit_offset
)
3832 || (m_avals
.m_known_aggs
[i
].by_ref
3833 != ctx
.m_avals
.m_known_aggs
[j
].by_ref
)
3834 || !operand_equal_p (m_avals
.m_known_aggs
[i
].value
,
3835 ctx
.m_avals
.m_known_aggs
[j
].value
))
3844 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3845 this context. Always compute size and min_size. Only compute time and
3846 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3850 ipa_call_context::estimate_size_and_time (ipa_call_estimates
*estimates
,
3851 bool est_times
, bool est_hints
)
3853 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3858 ipa_hints hints
= 0;
3859 sreal loops_with_known_iterations
= 0;
3860 sreal loops_with_known_strides
= 0;
3863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3866 fprintf (dump_file
, " Estimating body: %s\n"
3867 " Known to be false: ", m_node
->dump_name ());
3869 for (i
= ipa_predicate::not_inlined_condition
;
3870 i
< (ipa_predicate::first_dynamic_condition
3871 + (int) vec_safe_length (info
->conds
)); i
++)
3872 if (!(m_possible_truths
& (1 << i
)))
3875 fprintf (dump_file
, ", ");
3877 dump_condition (dump_file
, info
->conds
, i
);
3881 if (m_node
->callees
|| m_node
->indirect_calls
)
3882 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3883 est_times
? &time
: NULL
,
3884 est_hints
? &hints
: NULL
, m_possible_truths
,
3887 sreal nonspecialized_time
= time
;
3889 min_size
+= info
->size_time_table
[0].size
;
3890 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
3892 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3894 /* Because predicates are conservative, it can happen that nonconst is 1
3898 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3900 gcc_checking_assert (e
->time
>= 0);
3901 gcc_checking_assert (time
>= 0);
3903 /* We compute specialized size only because size of nonspecialized
3904 copy is context independent.
3906 The difference between nonspecialized execution and specialized is
3907 that nonspecialized is not going to have optimized out computations
3908 known to be constant in a specialized setting. */
3913 nonspecialized_time
+= e
->time
;
3916 else if (!m_inline_param_summary
.exists ())
3923 int prob
= e
->nonconst_predicate
.probability
3924 (info
->conds
, m_possible_truths
,
3925 m_inline_param_summary
);
3926 gcc_checking_assert (prob
>= 0);
3927 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3928 if (prob
== REG_BR_PROB_BASE
)
3931 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3933 gcc_checking_assert (time
>= 0);
3936 gcc_checking_assert (info
->size_time_table
[0].exec_predicate
== true);
3937 gcc_checking_assert (info
->size_time_table
[0].nonconst_predicate
== true);
3938 gcc_checking_assert (min_size
>= 0);
3939 gcc_checking_assert (size
>= 0);
3940 gcc_checking_assert (time
>= 0);
3941 /* nonspecialized_time should be always bigger than specialized time.
3942 Roundoff issues however may get into the way. */
3943 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3945 /* Roundoff issues may make specialized time bigger than nonspecialized
3946 time. We do not really want that to happen because some heuristics
3947 may get confused by seeing negative speedups. */
3948 if (time
> nonspecialized_time
)
3949 time
= nonspecialized_time
;
3954 hints
|= INLINE_HINT_in_scc
;
3955 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3956 hints
|= INLINE_HINT_declared_inline
;
3957 if (info
->builtin_constant_p_parms
.length ()
3958 && DECL_DECLARED_INLINE_P (m_node
->decl
))
3959 hints
|= INLINE_HINT_builtin_constant_p
;
3961 ipa_freqcounting_predicate
*fcp
;
3962 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
3963 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3965 hints
|= INLINE_HINT_loop_iterations
;
3966 loops_with_known_iterations
+= fcp
->freq
;
3968 estimates
->loops_with_known_iterations
= loops_with_known_iterations
;
3970 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
3971 if (!fcp
->predicate
->evaluate (m_possible_truths
))
3973 hints
|= INLINE_HINT_loop_stride
;
3974 loops_with_known_strides
+= fcp
->freq
;
3976 estimates
->loops_with_known_strides
= loops_with_known_strides
;
3979 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3980 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3982 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3984 fprintf (dump_file
, "\n size:%i", (int) size
);
3986 fprintf (dump_file
, " time:%f nonspec time:%f",
3987 time
.to_double (), nonspecialized_time
.to_double ());
3989 fprintf (dump_file
, " loops with known iterations:%f "
3990 "known strides:%f", loops_with_known_iterations
.to_double (),
3991 loops_with_known_strides
.to_double ());
3992 fprintf (dump_file
, "\n");
3996 estimates
->time
= time
;
3997 estimates
->nonspecialized_time
= nonspecialized_time
;
3999 estimates
->size
= size
;
4000 estimates
->min_size
= min_size
;
4002 estimates
->hints
= hints
;
4007 /* Estimate size and time needed to execute callee of EDGE assuming that
4008 parameters known to be constant at caller of EDGE are propagated.
4009 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
4010 and types for parameters. */
4013 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
4014 ipa_auto_call_arg_values
*avals
,
4015 ipa_call_estimates
*estimates
)
4017 clause_t clause
, nonspec_clause
;
4019 evaluate_conditions_for_known_args (node
, false, avals
, &clause
,
4020 &nonspec_clause
, NULL
);
4021 ipa_call_context
ctx (node
, clause
, nonspec_clause
, vNULL
, avals
);
4022 ctx
.estimate_size_and_time (estimates
);
4025 /* Return stack frame offset where frame of NODE is supposed to start inside
4026 of the function it is inlined to.
4027 Return 0 for functions that are not inlined. */
4030 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
4032 HOST_WIDE_INT offset
= 0;
4033 if (!node
->inlined_to
)
4035 node
= node
->callers
->caller
;
4038 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
4039 if (!node
->inlined_to
)
4041 node
= node
->callers
->caller
;
4046 /* Update summary information of inline clones after inlining.
4047 Compute peak stack usage. */
4050 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
4052 struct cgraph_edge
*e
;
4054 ipa_propagate_frequency (node
);
4055 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4057 if (!e
->inline_failed
)
4058 inline_update_callee_summaries (e
->callee
, depth
);
4060 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
4062 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4063 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
4066 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
4067 INLINED_EDGE has been inlined.
4069 When function A is inlined in B and A calls C with parameter that
4070 changes with probability PROB1 and C is known to be passthrough
4071 of argument if B that change with probability PROB2, the probability
4072 of change is now PROB1*PROB2. */
4075 remap_edge_params (struct cgraph_edge
*inlined_edge
,
4076 struct cgraph_edge
*edge
)
4078 if (ipa_node_params_sum
)
4081 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4084 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4085 class ipa_call_summary
*inlined_es
4086 = ipa_call_summaries
->get (inlined_edge
);
4088 if (es
->param
.length () == 0)
4091 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
4093 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4094 if (jfunc
->type
== IPA_JF_PASS_THROUGH
4095 || jfunc
->type
== IPA_JF_ANCESTOR
)
4097 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
4098 ? ipa_get_jf_pass_through_formal_id (jfunc
)
4099 : ipa_get_jf_ancestor_formal_id (jfunc
);
4100 if (id
< (int) inlined_es
->param
.length ())
4102 int prob1
= es
->param
[i
].change_prob
;
4103 int prob2
= inlined_es
->param
[id
].change_prob
;
4104 int prob
= combine_probabilities (prob1
, prob2
);
4106 if (prob1
&& prob2
&& !prob
)
4109 es
->param
[i
].change_prob
= prob
;
4112 ->param
[id
].points_to_local_or_readonly_memory
)
4113 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4115 ->param
[id
].points_to_possible_sra_candidate
)
4116 es
->param
[i
].points_to_possible_sra_candidate
= true;
4118 if (!es
->param
[i
].points_to_local_or_readonly_memory
4119 && jfunc
->type
== IPA_JF_CONST
4120 && points_to_local_or_readonly_memory_p
4121 (ipa_get_jf_constant (jfunc
)))
4122 es
->param
[i
].points_to_local_or_readonly_memory
= true;
4128 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4130 Remap predicates of callees of NODE. Rest of arguments match
4133 Also update change probabilities. */
4136 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
4137 struct cgraph_node
*node
,
4138 class ipa_fn_summary
*info
,
4139 class ipa_node_params
*params_summary
,
4140 class ipa_fn_summary
*callee_info
,
4141 const vec
<int> &operand_map
,
4142 const vec
<HOST_WIDE_INT
> &offset_map
,
4143 clause_t possible_truths
,
4144 ipa_predicate
*toplev_predicate
)
4146 struct cgraph_edge
*e
, *next
;
4147 for (e
= node
->callees
; e
; e
= next
)
4150 next
= e
->next_callee
;
4152 if (e
->inline_failed
)
4154 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4155 remap_edge_params (inlined_edge
, e
);
4159 p
= es
->predicate
->remap_after_inlining
4160 (info
, params_summary
,
4161 callee_info
, operand_map
,
4162 offset_map
, possible_truths
,
4164 edge_set_predicate (e
, &p
);
4167 edge_set_predicate (e
, toplev_predicate
);
4170 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
4171 params_summary
, callee_info
,
4172 operand_map
, offset_map
, possible_truths
,
4175 for (e
= node
->indirect_calls
; e
; e
= next
)
4177 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4179 next
= e
->next_callee
;
4181 remap_edge_params (inlined_edge
, e
);
4184 p
= es
->predicate
->remap_after_inlining
4185 (info
, params_summary
,
4186 callee_info
, operand_map
, offset_map
,
4187 possible_truths
, *toplev_predicate
);
4188 edge_set_predicate (e
, &p
);
4191 edge_set_predicate (e
, toplev_predicate
);
4195 /* Run remap_after_inlining on each predicate in V. */
4198 remap_freqcounting_predicate (class ipa_fn_summary
*info
,
4199 class ipa_node_params
*params_summary
,
4200 class ipa_fn_summary
*callee_info
,
4201 vec
<ipa_freqcounting_predicate
, va_gc
> *v
,
4202 const vec
<int> &operand_map
,
4203 const vec
<HOST_WIDE_INT
> &offset_map
,
4204 clause_t possible_truths
,
4205 ipa_predicate
*toplev_predicate
)
4208 ipa_freqcounting_predicate
*fcp
;
4209 for (int i
= 0; vec_safe_iterate (v
, i
, &fcp
); i
++)
4212 = fcp
->predicate
->remap_after_inlining (info
, params_summary
,
4213 callee_info
, operand_map
,
4214 offset_map
, possible_truths
,
4216 if (p
!= false && p
!= true)
4217 *fcp
->predicate
&= p
;
4221 /* We inlined EDGE. Update summary of the function we inlined into. */
4224 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
4226 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
4227 struct cgraph_node
*to
= (edge
->caller
->inlined_to
4228 ? edge
->caller
->inlined_to
: edge
->caller
);
4229 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
4230 clause_t clause
= 0; /* not_inline is known to be false. */
4232 auto_vec
<int, 8> operand_map
;
4233 auto_vec
<HOST_WIDE_INT
, 8> offset_map
;
4235 ipa_predicate toplev_predicate
;
4236 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
4237 ipa_node_params
*params_summary
= (ipa_node_params_sum
4238 ? ipa_node_params_sum
->get (to
) : NULL
);
4241 toplev_predicate
= *es
->predicate
;
4243 toplev_predicate
= true;
4245 info
->fp_expressions
|= callee_info
->fp_expressions
;
4246 info
->target_info
|= callee_info
->target_info
;
4248 if (callee_info
->conds
)
4250 ipa_auto_call_arg_values avals
;
4251 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, &avals
, false);
4253 if (ipa_node_params_sum
&& callee_info
->conds
)
4255 ipa_edge_args
*args
= ipa_edge_args_sum
->get (edge
);
4256 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
4261 operand_map
.safe_grow_cleared (count
, true);
4262 offset_map
.safe_grow_cleared (count
, true);
4264 for (i
= 0; i
< count
; i
++)
4266 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
4269 /* TODO: handle non-NOPs when merging. */
4270 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
4272 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4273 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
4274 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
4277 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
4279 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
4280 if (offset
>= 0 && offset
< INT_MAX
)
4282 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
4283 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
4285 offset_map
[i
] = offset
;
4288 operand_map
[i
] = map
;
4289 gcc_assert (map
< ipa_get_param_count (params_summary
));
4293 for (i
= 0; callee_info
->builtin_constant_p_parms
.iterate (i
, &ip
); i
++)
4294 if (ip
< count
&& operand_map
[ip
] >= 0)
4295 add_builtin_constant_p_parm (info
, operand_map
[ip
]);
4297 sreal freq
= edge
->sreal_frequency ();
4298 for (i
= 0; callee_info
->size_time_table
.iterate (i
, &e
); i
++)
4301 p
= e
->exec_predicate
.remap_after_inlining
4302 (info
, params_summary
,
4303 callee_info
, operand_map
,
4306 ipa_predicate nonconstp
;
4307 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
4308 (info
, params_summary
,
4309 callee_info
, operand_map
,
4312 if (p
!= false && nonconstp
!= false)
4314 sreal add_time
= ((sreal
)e
->time
* freq
);
4315 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
4317 if (prob
!= REG_BR_PROB_BASE
)
4318 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
4319 if (prob
!= REG_BR_PROB_BASE
4320 && dump_file
&& (dump_flags
& TDF_DETAILS
))
4322 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
4323 (double) prob
/ REG_BR_PROB_BASE
);
4325 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
4328 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
4329 callee_info
, operand_map
,
4330 offset_map
, clause
, &toplev_predicate
);
4331 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4332 info
->loop_iterations
, operand_map
,
4333 offset_map
, clause
, &toplev_predicate
);
4334 remap_freqcounting_predicate (info
, params_summary
, callee_info
,
4335 info
->loop_strides
, operand_map
,
4336 offset_map
, clause
, &toplev_predicate
);
4338 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
4339 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
4341 if (info
->estimated_stack_size
< peak
)
4342 info
->estimated_stack_size
= peak
;
4344 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
4345 if (info
->call_size_time_table
.length ())
4348 sreal edge_time
= 0;
4350 estimate_edge_size_and_time (edge
, &edge_size
, NULL
, &edge_time
, NULL
, 0);
4351 /* Unaccount size and time of the optimized out call. */
4352 info
->account_size_time (-edge_size
, -edge_time
,
4353 es
->predicate
? *es
->predicate
: true,
4354 es
->predicate
? *es
->predicate
: true,
4356 /* Account new calls. */
4357 summarize_calls_size_and_time (edge
->callee
, info
);
4360 /* Free summaries that are not maintained for inline clones/edges. */
4361 ipa_call_summaries
->remove (edge
);
4362 ipa_fn_summaries
->remove (edge
->callee
);
4363 ipa_remove_from_growth_caches (edge
);
4366 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4367 overall size and time. Recompute it.
4368 If RESET is true also recompute call_time_size_table. */
4371 ipa_update_overall_fn_summary (struct cgraph_node
*node
, bool reset
)
4373 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
4374 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
4378 size_info
->size
= 0;
4380 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4382 size_info
->size
+= e
->size
;
4383 info
->time
+= e
->time
;
4385 info
->min_size
= info
->size_time_table
[0].size
;
4387 info
->call_size_time_table
.release ();
4388 if (node
->callees
|| node
->indirect_calls
)
4389 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
4391 ~(clause_t
) (1 << ipa_predicate::false_condition
),
4393 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
4394 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
4398 /* This function performs intraprocedural analysis in NODE that is required to
4399 inline indirect calls. */
4402 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
4404 ipa_analyze_node (node
);
4405 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4407 ipa_print_node_params (dump_file
, node
);
4408 ipa_print_node_jump_functions (dump_file
, node
);
4413 /* Note function body size. */
4416 inline_analyze_function (struct cgraph_node
*node
)
4418 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
4421 fprintf (dump_file
, "\nAnalyzing function: %s\n", node
->dump_name ());
4422 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
)
4423 inline_indirect_intraprocedural_analysis (node
);
4424 compute_fn_summary (node
, false);
4427 struct cgraph_edge
*e
;
4428 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4429 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4430 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4431 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
4438 /* Called when new function is inserted to callgraph late. */
4441 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
4443 inline_analyze_function (node
);
4446 /* Note function body size. */
4449 ipa_fn_summary_generate (void)
4451 struct cgraph_node
*node
;
4453 FOR_EACH_DEFINED_FUNCTION (node
)
4454 if (DECL_STRUCT_FUNCTION (node
->decl
))
4455 node
->versionable
= tree_versionable_function_p (node
->decl
);
4457 ipa_fn_summary_alloc ();
4459 ipa_fn_summaries
->enable_insertion_hook ();
4461 ipa_register_cgraph_hooks ();
4463 FOR_EACH_DEFINED_FUNCTION (node
)
4465 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
4466 || opt_for_fn (node
->decl
, optimize
)))
4467 inline_analyze_function (node
);
4471 /* Write inline summary for edge E to OB. */
4474 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
4477 class ipa_call_summary
*es
= prevails
4478 ? ipa_call_summaries
->get_create (e
) : NULL
;
4482 int size
= streamer_read_uhwi (ib
);
4483 int time
= streamer_read_uhwi (ib
);
4484 int depth
= streamer_read_uhwi (ib
);
4488 es
->call_stmt_size
= size
;
4489 es
->call_stmt_time
= time
;
4490 es
->loop_depth
= depth
;
4493 bitpack_d bp
= streamer_read_bitpack (ib
);
4495 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
4497 bp_unpack_value (&bp
, 1);
4501 edge_set_predicate (e
, &p
);
4502 length
= streamer_read_uhwi (ib
);
4504 && (e
->possibly_call_in_translation_unit_p ()
4505 /* Also stream in jump functions to builtins in hope that they
4506 will get fnspecs. */
4507 || fndecl_built_in_p (e
->callee
->decl
, BUILT_IN_NORMAL
)))
4509 es
->param
.safe_grow_cleared (length
, true);
4510 for (i
= 0; i
< length
; i
++)
4512 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
4513 bitpack_d bp
= streamer_read_bitpack (ib
);
4514 es
->param
[i
].points_to_local_or_readonly_memory
4515 = bp_unpack_value (&bp
, 1);
4516 es
->param
[i
].points_to_possible_sra_candidate
4517 = bp_unpack_value (&bp
, 1);
4522 for (i
= 0; i
< length
; i
++)
4524 streamer_read_uhwi (ib
);
4525 streamer_read_uhwi (ib
);
4531 /* Stream in inline summaries from the section. */
4534 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
4537 const struct lto_function_header
*header
=
4538 (const struct lto_function_header
*) data
;
4539 const int cfg_offset
= sizeof (struct lto_function_header
);
4540 const int main_offset
= cfg_offset
+ header
->cfg_size
;
4541 const int string_offset
= main_offset
+ header
->main_size
;
4542 class data_in
*data_in
;
4543 unsigned int i
, count2
, j
;
4544 unsigned int f_count
;
4546 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
4550 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
4551 header
->string_size
, vNULL
);
4552 f_count
= streamer_read_uhwi (&ib
);
4553 for (i
= 0; i
< f_count
; i
++)
4556 struct cgraph_node
*node
;
4557 class ipa_fn_summary
*info
;
4558 class ipa_node_params
*params_summary
;
4559 class ipa_size_summary
*size_info
;
4560 lto_symtab_encoder_t encoder
;
4561 struct bitpack_d bp
;
4562 struct cgraph_edge
*e
;
4565 index
= streamer_read_uhwi (&ib
);
4566 encoder
= file_data
->symtab_node_encoder
;
4567 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
4569 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
4570 params_summary
= node
->prevailing_p ()
4571 ? ipa_node_params_sum
->get (node
) : NULL
;
4572 size_info
= node
->prevailing_p ()
4573 ? ipa_size_summaries
->get_create (node
) : NULL
;
4575 int stack_size
= streamer_read_uhwi (&ib
);
4576 int size
= streamer_read_uhwi (&ib
);
4577 sreal time
= sreal::stream_in (&ib
);
4581 info
->estimated_stack_size
4582 = size_info
->estimated_self_stack_size
= stack_size
;
4583 size_info
->size
= size_info
->self_size
= size
;
4587 bp
= streamer_read_bitpack (&ib
);
4590 info
->inlinable
= bp_unpack_value (&bp
, 1);
4591 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
4592 if (!lto_stream_offload_p
)
4593 info
->target_info
= streamer_read_uhwi (&ib
);
4597 bp_unpack_value (&bp
, 1);
4598 bp_unpack_value (&bp
, 1);
4599 if (!lto_stream_offload_p
)
4600 streamer_read_uhwi (&ib
);
4603 count2
= streamer_read_uhwi (&ib
);
4604 gcc_assert (!info
|| !info
->conds
);
4606 vec_safe_reserve_exact (info
->conds
, count2
);
4607 for (j
= 0; j
< count2
; j
++)
4610 unsigned int k
, count3
;
4611 c
.operand_num
= streamer_read_uhwi (&ib
);
4612 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4613 c
.type
= stream_read_tree (&ib
, data_in
);
4614 c
.val
= stream_read_tree (&ib
, data_in
);
4615 bp
= streamer_read_bitpack (&ib
);
4616 c
.agg_contents
= bp_unpack_value (&bp
, 1);
4617 c
.by_ref
= bp_unpack_value (&bp
, 1);
4619 c
.offset
= streamer_read_uhwi (&ib
);
4620 count3
= streamer_read_uhwi (&ib
);
4623 vec_safe_reserve_exact (c
.param_ops
, count3
);
4625 ipa_set_param_used_by_ipa_predicates
4626 (params_summary
, c
.operand_num
, true);
4627 for (k
= 0; k
< count3
; k
++)
4629 struct expr_eval_op op
;
4630 enum gimple_rhs_class rhs_class
;
4631 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
4632 op
.type
= stream_read_tree (&ib
, data_in
);
4633 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
4635 case GIMPLE_UNARY_RHS
:
4637 op
.val
[0] = NULL_TREE
;
4638 op
.val
[1] = NULL_TREE
;
4641 case GIMPLE_BINARY_RHS
:
4642 case GIMPLE_TERNARY_RHS
:
4643 bp
= streamer_read_bitpack (&ib
);
4644 op
.index
= bp_unpack_value (&bp
, 2);
4645 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4646 if (rhs_class
== GIMPLE_BINARY_RHS
)
4647 op
.val
[1] = NULL_TREE
;
4649 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4653 fatal_error (UNKNOWN_LOCATION
,
4654 "invalid fnsummary in LTO stream");
4657 c
.param_ops
->quick_push (op
);
4660 info
->conds
->quick_push (c
);
4662 count2
= streamer_read_uhwi (&ib
);
4663 gcc_assert (!info
|| !info
->size_time_table
.length ());
4665 info
->size_time_table
.reserve_exact (count2
);
4666 for (j
= 0; j
< count2
; j
++)
4668 class size_time_entry e
;
4670 e
.size
= streamer_read_uhwi (&ib
);
4671 e
.time
= sreal::stream_in (&ib
);
4672 e
.exec_predicate
.stream_in (&ib
);
4673 e
.nonconst_predicate
.stream_in (&ib
);
4676 info
->size_time_table
.quick_push (e
);
4679 count2
= streamer_read_uhwi (&ib
);
4680 for (j
= 0; j
< count2
; j
++)
4683 sreal fcp_freq
= sreal::stream_in (&ib
);
4686 ipa_freqcounting_predicate fcp
;
4687 fcp
.predicate
= NULL
;
4688 set_hint_predicate (&fcp
.predicate
, p
);
4689 fcp
.freq
= fcp_freq
;
4690 vec_safe_push (info
->loop_iterations
, fcp
);
4693 count2
= streamer_read_uhwi (&ib
);
4694 for (j
= 0; j
< count2
; j
++)
4697 sreal fcp_freq
= sreal::stream_in (&ib
);
4700 ipa_freqcounting_predicate fcp
;
4701 fcp
.predicate
= NULL
;
4702 set_hint_predicate (&fcp
.predicate
, p
);
4703 fcp
.freq
= fcp_freq
;
4704 vec_safe_push (info
->loop_strides
, fcp
);
4707 count2
= streamer_read_uhwi (&ib
);
4709 info
->builtin_constant_p_parms
.reserve_exact (count2
);
4710 for (j
= 0; j
< count2
; j
++)
4712 int parm
= streamer_read_uhwi (&ib
);
4714 info
->builtin_constant_p_parms
.quick_push (parm
);
4716 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4717 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4718 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4719 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4722 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4724 lto_data_in_delete (data_in
);
4728 /* Read inline summary. Jump functions are shared among ipa-cp
4729 and inliner, so when ipa-cp is active, we don't need to write them
4733 ipa_fn_summary_read (void)
4735 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4736 struct lto_file_decl_data
*file_data
;
4739 ipa_prop_read_jump_functions ();
4740 ipa_fn_summary_alloc ();
4742 while ((file_data
= file_data_vec
[j
++]))
4746 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4749 inline_read_section (file_data
, data
, len
);
4751 /* Fatal error here. We do not want to support compiling ltrans units
4752 with different version of compiler or different flags than the WPA
4753 unit, so this should never happen. */
4754 fatal_error (input_location
,
4755 "ipa inline summary is missing in input file");
4757 ipa_register_cgraph_hooks ();
4759 gcc_assert (ipa_fn_summaries
);
4760 ipa_fn_summaries
->enable_insertion_hook ();
4764 /* Write inline summary for edge E to OB. */
4767 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4769 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4772 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4773 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4774 streamer_write_uhwi (ob
, es
->loop_depth
);
4776 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4777 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4778 streamer_write_bitpack (&bp
);
4781 es
->predicate
->stream_out (ob
);
4783 streamer_write_uhwi (ob
, 0);
4784 streamer_write_uhwi (ob
, es
->param
.length ());
4785 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4787 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4788 bp
= bitpack_create (ob
->main_stream
);
4789 bp_pack_value (&bp
, es
->param
[i
].points_to_local_or_readonly_memory
, 1);
4790 bp_pack_value (&bp
, es
->param
[i
].points_to_possible_sra_candidate
, 1);
4791 streamer_write_bitpack (&bp
);
4796 /* Write inline summary for node in SET.
4797 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4798 active, we don't need to write them twice. */
4801 ipa_fn_summary_write (void)
4803 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4804 lto_symtab_encoder_iterator lsei
;
4805 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4806 unsigned int count
= 0;
4808 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4809 lsei_next_function_in_partition (&lsei
))
4811 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4812 if (cnode
->definition
&& !cnode
->alias
)
4815 streamer_write_uhwi (ob
, count
);
4817 for (lsei
= lsei_start_function_in_partition (encoder
); !lsei_end_p (lsei
);
4818 lsei_next_function_in_partition (&lsei
))
4820 cgraph_node
*cnode
= lsei_cgraph_node (lsei
);
4821 if (cnode
->definition
&& !cnode
->alias
)
4823 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4824 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4825 struct bitpack_d bp
;
4826 struct cgraph_edge
*edge
;
4829 struct condition
*c
;
4831 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4832 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4833 streamer_write_hwi (ob
, size_info
->self_size
);
4834 info
->time
.stream_out (ob
);
4835 bp
= bitpack_create (ob
->main_stream
);
4836 bp_pack_value (&bp
, info
->inlinable
, 1);
4837 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4838 streamer_write_bitpack (&bp
);
4839 if (!lto_stream_offload_p
)
4840 streamer_write_uhwi (ob
, info
->target_info
);
4841 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4842 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4845 struct expr_eval_op
*op
;
4847 streamer_write_uhwi (ob
, c
->operand_num
);
4848 streamer_write_uhwi (ob
, c
->code
);
4849 stream_write_tree (ob
, c
->type
, true);
4850 stream_write_tree (ob
, c
->val
, true);
4851 bp
= bitpack_create (ob
->main_stream
);
4852 bp_pack_value (&bp
, c
->agg_contents
, 1);
4853 bp_pack_value (&bp
, c
->by_ref
, 1);
4854 streamer_write_bitpack (&bp
);
4855 if (c
->agg_contents
)
4856 streamer_write_uhwi (ob
, c
->offset
);
4857 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4858 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4860 streamer_write_uhwi (ob
, op
->code
);
4861 stream_write_tree (ob
, op
->type
, true);
4864 bp
= bitpack_create (ob
->main_stream
);
4865 bp_pack_value (&bp
, op
->index
, 2);
4866 streamer_write_bitpack (&bp
);
4867 stream_write_tree (ob
, op
->val
[0], true);
4869 stream_write_tree (ob
, op
->val
[1], true);
4873 streamer_write_uhwi (ob
, info
->size_time_table
.length ());
4874 for (i
= 0; info
->size_time_table
.iterate (i
, &e
); i
++)
4876 streamer_write_uhwi (ob
, e
->size
);
4877 e
->time
.stream_out (ob
);
4878 e
->exec_predicate
.stream_out (ob
);
4879 e
->nonconst_predicate
.stream_out (ob
);
4881 ipa_freqcounting_predicate
*fcp
;
4882 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_iterations
));
4883 for (i
= 0; vec_safe_iterate (info
->loop_iterations
, i
, &fcp
); i
++)
4885 fcp
->predicate
->stream_out (ob
);
4886 fcp
->freq
.stream_out (ob
);
4888 streamer_write_uhwi (ob
, vec_safe_length (info
->loop_strides
));
4889 for (i
= 0; vec_safe_iterate (info
->loop_strides
, i
, &fcp
); i
++)
4891 fcp
->predicate
->stream_out (ob
);
4892 fcp
->freq
.stream_out (ob
);
4894 streamer_write_uhwi (ob
, info
->builtin_constant_p_parms
.length ());
4896 for (i
= 0; info
->builtin_constant_p_parms
.iterate (i
, &ip
);
4898 streamer_write_uhwi (ob
, ip
);
4899 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4900 write_ipa_call_summary (ob
, edge
);
4901 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4902 write_ipa_call_summary (ob
, edge
);
4905 streamer_write_char_stream (ob
->main_stream
, 0);
4906 produce_asm (ob
, NULL
);
4907 destroy_output_block (ob
);
4909 ipa_prop_write_jump_functions ();
4913 /* Release function summary. */
4916 ipa_free_fn_summary (void)
4918 if (!ipa_call_summaries
)
4920 ggc_delete (ipa_fn_summaries
);
4921 ipa_fn_summaries
= NULL
;
4922 delete ipa_call_summaries
;
4923 ipa_call_summaries
= NULL
;
4924 edge_predicate_pool
.release ();
4925 /* During IPA this is one of largest datastructures to release. */
4930 /* Release function summary. */
4933 ipa_free_size_summary (void)
4935 if (!ipa_size_summaries
)
4937 delete ipa_size_summaries
;
4938 ipa_size_summaries
= NULL
;
4943 const pass_data pass_data_local_fn_summary
=
4945 GIMPLE_PASS
, /* type */
4946 "local-fnsummary", /* name */
4947 OPTGROUP_INLINE
, /* optinfo_flags */
4948 TV_INLINE_PARAMETERS
, /* tv_id */
4949 0, /* properties_required */
4950 0, /* properties_provided */
4951 0, /* properties_destroyed */
4952 0, /* todo_flags_start */
4953 0, /* todo_flags_finish */
4956 class pass_local_fn_summary
: public gimple_opt_pass
4959 pass_local_fn_summary (gcc::context
*ctxt
)
4960 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4963 /* opt_pass methods: */
4964 opt_pass
* clone () final override
4966 return new pass_local_fn_summary (m_ctxt
);
4968 unsigned int execute (function
*) final override
4970 return compute_fn_summary_for_current ();
4973 }; // class pass_local_fn_summary
4978 make_pass_local_fn_summary (gcc::context
*ctxt
)
4980 return new pass_local_fn_summary (ctxt
);
4984 /* Free inline summary. */
4988 const pass_data pass_data_ipa_free_fn_summary
=
4990 SIMPLE_IPA_PASS
, /* type */
4991 "free-fnsummary", /* name */
4992 OPTGROUP_NONE
, /* optinfo_flags */
4993 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4994 0, /* properties_required */
4995 0, /* properties_provided */
4996 0, /* properties_destroyed */
4997 0, /* todo_flags_start */
4998 0, /* todo_flags_finish */
5001 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
5004 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
5005 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
5009 /* opt_pass methods: */
5010 opt_pass
*clone () final override
5012 return new pass_ipa_free_fn_summary (m_ctxt
);
5014 void set_pass_param (unsigned int n
, bool param
) final override
5016 gcc_assert (n
== 0);
5019 bool gate (function
*) final override
{ return true; }
5020 unsigned int execute (function
*) final override
5022 ipa_free_fn_summary ();
5023 /* Free ipa-prop structures if they are no longer needed. */
5024 ipa_free_all_structures_after_iinln ();
5026 ipa_free_size_summary ();
5032 }; // class pass_ipa_free_fn_summary
5036 simple_ipa_opt_pass
*
5037 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
5039 return new pass_ipa_free_fn_summary (ctxt
);
5044 const pass_data pass_data_ipa_fn_summary
=
5046 IPA_PASS
, /* type */
5047 "fnsummary", /* name */
5048 OPTGROUP_INLINE
, /* optinfo_flags */
5049 TV_IPA_FNSUMMARY
, /* tv_id */
5050 0, /* properties_required */
5051 0, /* properties_provided */
5052 0, /* properties_destroyed */
5053 0, /* todo_flags_start */
5054 ( TODO_dump_symtab
), /* todo_flags_finish */
5057 class pass_ipa_fn_summary
: public ipa_opt_pass_d
5060 pass_ipa_fn_summary (gcc::context
*ctxt
)
5061 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
5062 ipa_fn_summary_generate
, /* generate_summary */
5063 ipa_fn_summary_write
, /* write_summary */
5064 ipa_fn_summary_read
, /* read_summary */
5065 NULL
, /* write_optimization_summary */
5066 NULL
, /* read_optimization_summary */
5067 NULL
, /* stmt_fixup */
5068 0, /* function_transform_todo_flags_start */
5069 NULL
, /* function_transform */
5070 NULL
) /* variable_transform */
5073 /* opt_pass methods: */
5074 unsigned int execute (function
*) final override
{ return 0; }
5076 }; // class pass_ipa_fn_summary
5081 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
5083 return new pass_ipa_fn_summary (ctxt
);
5086 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
5087 within the same process. For use by toplev::finalize. */
5090 ipa_fnsummary_cc_finalize (void)
5092 ipa_free_fn_summary ();