1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
88 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
89 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
91 /* Edge predicates goes here. */
92 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
97 ipa_dump_hints (FILE *f
, ipa_hints hints
)
101 fprintf (f
, "IPA hints:");
102 if (hints
& INLINE_HINT_indirect_call
)
104 hints
&= ~INLINE_HINT_indirect_call
;
105 fprintf (f
, " indirect_call");
107 if (hints
& INLINE_HINT_loop_iterations
)
109 hints
&= ~INLINE_HINT_loop_iterations
;
110 fprintf (f
, " loop_iterations");
112 if (hints
& INLINE_HINT_loop_stride
)
114 hints
&= ~INLINE_HINT_loop_stride
;
115 fprintf (f
, " loop_stride");
117 if (hints
& INLINE_HINT_same_scc
)
119 hints
&= ~INLINE_HINT_same_scc
;
120 fprintf (f
, " same_scc");
122 if (hints
& INLINE_HINT_in_scc
)
124 hints
&= ~INLINE_HINT_in_scc
;
125 fprintf (f
, " in_scc");
127 if (hints
& INLINE_HINT_cross_module
)
129 hints
&= ~INLINE_HINT_cross_module
;
130 fprintf (f
, " cross_module");
132 if (hints
& INLINE_HINT_declared_inline
)
134 hints
&= ~INLINE_HINT_declared_inline
;
135 fprintf (f
, " declared_inline");
137 if (hints
& INLINE_HINT_array_index
)
139 hints
&= ~INLINE_HINT_array_index
;
140 fprintf (f
, " array_index");
142 if (hints
& INLINE_HINT_known_hot
)
144 hints
&= ~INLINE_HINT_known_hot
;
145 fprintf (f
, " known_hot");
151 /* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
157 ipa_fn_summary::account_size_time (int size
, sreal time
,
158 const predicate
&exec_pred
,
159 const predicate
&nonconst_pred_in
)
164 predicate nonconst_pred
;
166 if (exec_pred
== false)
169 nonconst_pred
= nonconst_pred_in
& exec_pred
;
171 if (nonconst_pred
== false)
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size
&& time
== 0 && size_time_table
)
179 gcc_assert (time
>= 0);
181 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
182 if (e
->exec_predicate
== exec_pred
183 && e
->nonconst_predicate
== nonconst_pred
)
192 e
= &(*size_time_table
)[0];
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
198 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size
) / ipa_fn_summary::size_scale
,
203 (time
.to_double ()), found
? "" : "new ");
204 exec_pred
.dump (dump_file
, conds
, 0);
205 if (exec_pred
!= nonconst_pred
)
207 fprintf (dump_file
, " nonconst:");
208 nonconst_pred
.dump (dump_file
, conds
);
211 fprintf (dump_file
, "\n");
215 struct size_time_entry new_entry
;
216 new_entry
.size
= size
;
217 new_entry
.time
= time
;
218 new_entry
.exec_predicate
= exec_pred
;
219 new_entry
.nonconst_predicate
= nonconst_pred
;
220 vec_safe_push (size_time_table
, new_entry
);
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
231 static struct cgraph_edge
*
232 redirect_to_unreachable (struct cgraph_edge
*e
)
234 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
235 struct cgraph_node
*target
= cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
239 e
= e
->resolve_speculation (target
->decl
);
241 e
->make_direct (target
);
243 e
->redirect_callee (target
);
244 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
245 e
->inline_failed
= CIF_UNREACHABLE
;
246 e
->count
= profile_count::zero ();
247 es
->call_stmt_size
= 0;
248 es
->call_stmt_time
= 0;
250 callee
->remove_symbol_and_inline_clones ();
254 /* Set predicate for edge E. */
257 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
262 if (predicate
&& *predicate
== false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e
->speculative
|| e
->callee
))
267 e
= redirect_to_unreachable (e
);
269 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
270 if (predicate
&& *predicate
!= true)
273 es
->predicate
= edge_predicate_pool
.allocate ();
274 *es
->predicate
= *predicate
;
279 edge_predicate_pool
.remove (es
->predicate
);
280 es
->predicate
= NULL
;
284 /* Set predicate for hint *P. */
287 set_hint_predicate (predicate
**p
, predicate new_predicate
)
289 if (new_predicate
== false || new_predicate
== true)
292 edge_predicate_pool
.remove (*p
);
298 *p
= edge_predicate_pool
.allocate ();
304 /* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests cannot use
311 the fact that parameter is indeed a constant.
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
318 ERROR_MARK means compile time invariant. */
321 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
323 vec
<tree
> known_vals
,
324 vec
<ipa_agg_jump_function_p
>
326 clause_t
*ret_clause
,
327 clause_t
*ret_nonspec_clause
)
329 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
330 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
331 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
335 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
344 gcc_checking_assert (!known_aggs
.exists ()
345 || (known_vals
.length () == known_aggs
.length ()));
346 if (c
->operand_num
>= (int) known_vals
.length ())
348 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
349 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
355 struct ipa_agg_jump_function
*agg
;
357 if (c
->code
== predicate::changed
359 && (known_vals
[c
->operand_num
] == error_mark_node
))
362 if (known_aggs
.exists ())
364 agg
= known_aggs
[c
->operand_num
];
365 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
366 c
->offset
, c
->by_ref
);
373 val
= known_vals
[c
->operand_num
];
374 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
380 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
381 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
384 if (c
->code
== predicate::changed
)
386 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val
))) != c
->size
)
392 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
393 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
396 if (c
->code
== predicate::is_not_constant
)
398 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
402 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (c
->val
), val
);
404 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
407 if (res
&& integer_zerop (res
))
410 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
411 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
413 *ret_clause
= clause
;
414 if (ret_nonspec_clause
)
415 *ret_nonspec_clause
= nonspec_clause
;
419 /* Work out what conditions might be true at invocation of E. */
422 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
423 clause_t
*clause_ptr
,
424 clause_t
*nonspec_clause_ptr
,
425 vec
<tree
> *known_vals_ptr
,
426 vec
<ipa_polymorphic_call_context
>
428 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
430 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
431 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
432 vec
<tree
> known_vals
= vNULL
;
433 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
436 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
438 known_vals_ptr
->create (0);
439 if (known_contexts_ptr
)
440 known_contexts_ptr
->create (0);
442 if (ipa_node_params_sum
443 && !e
->call_stmt_cannot_inline_p
444 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
))
446 struct ipa_node_params
*caller_parms_info
, *callee_pi
;
447 struct ipa_edge_args
*args
= IPA_EDGE_REF (e
);
448 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
449 int i
, count
= ipa_get_cs_argument_count (args
);
451 if (e
->caller
->global
.inlined_to
)
452 caller_parms_info
= IPA_NODE_REF (e
->caller
->global
.inlined_to
);
454 caller_parms_info
= IPA_NODE_REF (e
->caller
);
455 callee_pi
= IPA_NODE_REF (e
->callee
);
457 if (count
&& (info
->conds
|| known_vals_ptr
))
458 known_vals
.safe_grow_cleared (count
);
459 if (count
&& (info
->conds
|| known_aggs_ptr
))
460 known_aggs
.safe_grow_cleared (count
);
461 if (count
&& known_contexts_ptr
)
462 known_contexts_ptr
->safe_grow_cleared (count
);
464 for (i
= 0; i
< count
; i
++)
466 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
467 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
468 ipa_get_type (callee_pi
, i
));
470 if (!cst
&& e
->call_stmt
471 && i
< (int)gimple_call_num_args (e
->call_stmt
))
473 cst
= gimple_call_arg (e
->call_stmt
, i
);
474 if (!is_gimple_min_invariant (cst
))
479 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
480 if (known_vals
.exists ())
483 else if (inline_p
&& !es
->param
[i
].change_prob
)
484 known_vals
[i
] = error_mark_node
;
486 if (known_contexts_ptr
)
487 (*known_contexts_ptr
)[i
]
488 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs
[i
] = &jf
->agg
;
495 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
496 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
498 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
500 if (count
&& (info
->conds
|| known_vals_ptr
))
501 known_vals
.safe_grow_cleared (count
);
502 for (i
= 0; i
< count
; i
++)
504 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
505 if (!is_gimple_min_invariant (cst
))
512 evaluate_conditions_for_known_args (callee
, inline_p
,
513 known_vals
, known_aggs
, clause_ptr
,
517 *known_vals_ptr
= known_vals
;
519 known_vals
.release ();
522 *known_aggs_ptr
= known_aggs
;
524 known_aggs
.release ();
528 /* Allocate the function summary. */
531 ipa_fn_summary_alloc (void)
533 gcc_checking_assert (!ipa_fn_summaries
);
534 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
535 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
538 ipa_call_summary::~ipa_call_summary ()
541 edge_predicate_pool
.remove (predicate
);
546 ipa_fn_summary::~ipa_fn_summary ()
549 edge_predicate_pool
.remove (loop_iterations
);
551 edge_predicate_pool
.remove (loop_stride
);
553 edge_predicate_pool
.remove (array_index
);
555 vec_free (size_time_table
);
559 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
562 for (e
= node
->callees
; e
; e
= e
->next_callee
)
563 ipa_call_summaries
->remove (e
);
564 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
565 ipa_call_summaries
->remove (e
);
568 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
569 Additionally care about allocating new memory slot for updated predicate
570 and set it to NULL when it becomes true or false (and thus uninteresting).
574 remap_hint_predicate_after_duplication (predicate
**p
,
575 clause_t possible_truths
)
577 predicate new_predicate
;
582 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
583 /* We do not want to free previous predicate; it is used by node origin. */
585 set_hint_predicate (p
, new_predicate
);
589 /* Hook that is called by cgraph.c when a node is duplicated. */
591 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
594 ipa_fn_summary
*info
)
596 new (info
) ipa_fn_summary (*ipa_fn_summaries
->get (src
));
597 /* TODO: as an optimization, we may avoid copying conditions
598 that are known to be false or true. */
599 info
->conds
= vec_safe_copy (info
->conds
);
601 /* When there are any replacements in the function body, see if we can figure
602 out that something was optimized out. */
603 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
605 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
606 /* Use SRC parm info since it may not be copied yet. */
607 struct ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
608 vec
<tree
> known_vals
= vNULL
;
609 int count
= ipa_get_param_count (parms_info
);
611 clause_t possible_truths
;
612 predicate true_pred
= true;
614 int optimized_out_size
= 0;
615 bool inlined_to_p
= false;
616 struct cgraph_edge
*edge
, *next
;
618 info
->size_time_table
= 0;
619 known_vals
.safe_grow_cleared (count
);
620 for (i
= 0; i
< count
; i
++)
622 struct ipa_replace_map
*r
;
624 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
626 if (((!r
->old_tree
&& r
->parm_num
== i
)
627 || (r
->old_tree
&& r
->old_tree
== ipa_get_param (parms_info
, i
)))
628 && r
->replace_p
&& !r
->ref_p
)
630 known_vals
[i
] = r
->new_tree
;
635 evaluate_conditions_for_known_args (dst
, false,
639 /* We are going to specialize,
640 so ignore nonspec truths. */
642 known_vals
.release ();
644 info
->account_size_time (0, 0, true_pred
, true_pred
);
646 /* Remap size_time vectors.
647 Simplify the predicate by prunning out alternatives that are known
649 TODO: as on optimization, we can also eliminate conditions known
651 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
653 predicate new_exec_pred
;
654 predicate new_nonconst_pred
;
655 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
657 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
659 if (new_exec_pred
== false || new_nonconst_pred
== false)
660 optimized_out_size
+= e
->size
;
662 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
666 /* Remap edge predicates with the same simplification as above.
667 Also copy constantness arrays. */
668 for (edge
= dst
->callees
; edge
; edge
= next
)
670 predicate new_predicate
;
671 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
672 next
= edge
->next_callee
;
674 if (!edge
->inline_failed
)
678 new_predicate
= es
->predicate
->remap_after_duplication
680 if (new_predicate
== false && *es
->predicate
!= false)
681 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
682 edge_set_predicate (edge
, &new_predicate
);
685 /* Remap indirect edge predicates with the same simplificaiton as above.
686 Also copy constantness arrays. */
687 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
689 predicate new_predicate
;
690 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
691 next
= edge
->next_callee
;
693 gcc_checking_assert (edge
->inline_failed
);
696 new_predicate
= es
->predicate
->remap_after_duplication
698 if (new_predicate
== false && *es
->predicate
!= false)
699 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
700 edge_set_predicate (edge
, &new_predicate
);
702 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
704 remap_hint_predicate_after_duplication (&info
->loop_stride
,
706 remap_hint_predicate_after_duplication (&info
->array_index
,
709 /* If inliner or someone after inliner will ever start producing
710 non-trivial clones, we will get trouble with lack of information
711 about updating self sizes, because size vectors already contains
712 sizes of the calees. */
713 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
717 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
718 if (info
->loop_iterations
)
720 predicate p
= *info
->loop_iterations
;
721 info
->loop_iterations
= NULL
;
722 set_hint_predicate (&info
->loop_iterations
, p
);
724 if (info
->loop_stride
)
726 predicate p
= *info
->loop_stride
;
727 info
->loop_stride
= NULL
;
728 set_hint_predicate (&info
->loop_stride
, p
);
730 if (info
->array_index
)
732 predicate p
= *info
->array_index
;
733 info
->array_index
= NULL
;
734 set_hint_predicate (&info
->array_index
, p
);
737 if (!dst
->global
.inlined_to
)
738 ipa_update_overall_fn_summary (dst
);
742 /* Hook that is called by cgraph.c when a node is duplicated. */
745 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
746 struct cgraph_edge
*dst
,
747 struct ipa_call_summary
*srcinfo
,
748 struct ipa_call_summary
*info
)
750 new (info
) ipa_call_summary (*srcinfo
);
751 info
->predicate
= NULL
;
752 edge_set_predicate (dst
, srcinfo
->predicate
);
753 info
->param
= srcinfo
->param
.copy ();
754 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
756 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
757 - eni_size_weights
.call_cost
);
758 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
759 - eni_time_weights
.call_cost
);
763 /* Dump edge summaries associated to NODE and recursively to all clones.
767 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
768 struct ipa_fn_summary
*info
)
770 struct cgraph_edge
*edge
;
771 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
773 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
774 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
778 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
779 indent
, "", callee
->name (), callee
->order
,
781 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
782 indent
, "", es
->loop_depth
, edge
->sreal_frequency ().to_double (),
783 es
->call_stmt_size
, es
->call_stmt_time
);
785 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
787 fprintf (f
, "callee size:%2i stack:%2i",
788 (int) (s
->size
/ ipa_fn_summary::size_scale
),
789 (int) s
->estimated_stack_size
);
793 fprintf (f
, " predicate: ");
794 es
->predicate
->dump (f
, info
->conds
);
798 if (es
->param
.exists ())
799 for (i
= 0; i
< (int) es
->param
.length (); i
++)
801 int prob
= es
->param
[i
].change_prob
;
804 fprintf (f
, "%*s op%i is compile time invariant\n",
806 else if (prob
!= REG_BR_PROB_BASE
)
807 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
808 prob
* 100.0 / REG_BR_PROB_BASE
);
810 if (!edge
->inline_failed
)
812 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
813 fprintf (f
, "%*sStack frame offset %i, callee self size %i,"
816 (int) s
->stack_frame_offset
,
817 (int) s
->estimated_self_stack_size
,
818 (int) s
->estimated_stack_size
);
819 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
822 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
824 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
825 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
829 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
833 fprintf (f
, "predicate: ");
834 es
->predicate
->dump (f
, info
->conds
);
843 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
845 if (node
->definition
)
847 struct ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
852 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
853 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
854 fprintf (f
, " always_inline");
856 fprintf (f
, " inlinable");
857 if (s
->fp_expressions
)
858 fprintf (f
, " fp_expression");
859 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
860 fprintf (f
, " self size: %i\n", s
->self_size
);
861 fprintf (f
, " global size: %i\n", s
->size
);
862 fprintf (f
, " min size: %i\n", s
->min_size
);
863 fprintf (f
, " self stack: %i\n",
864 (int) s
->estimated_self_stack_size
);
865 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
867 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
869 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
870 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
872 fprintf (f
, " size:%f, time:%f",
873 (double) e
->size
/ ipa_fn_summary::size_scale
,
874 e
->time
.to_double ());
875 if (e
->exec_predicate
!= true)
877 fprintf (f
, ", executed if:");
878 e
->exec_predicate
.dump (f
, s
->conds
, 0);
880 if (e
->exec_predicate
!= e
->nonconst_predicate
)
882 fprintf (f
, ", nonconst if:");
883 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
887 if (s
->loop_iterations
)
889 fprintf (f
, " loop iterations:");
890 s
->loop_iterations
->dump (f
, s
->conds
);
894 fprintf (f
, " loop stride:");
895 s
->loop_stride
->dump (f
, s
->conds
);
899 fprintf (f
, " array index:");
900 s
->array_index
->dump (f
, s
->conds
);
902 fprintf (f
, " calls:\n");
903 dump_ipa_call_summary (f
, 4, node
, s
);
907 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
912 ipa_debug_fn_summary (struct cgraph_node
*node
)
914 ipa_dump_fn_summary (stderr
, node
);
918 ipa_dump_fn_summaries (FILE *f
)
920 struct cgraph_node
*node
;
922 FOR_EACH_DEFINED_FUNCTION (node
)
923 if (!node
->global
.inlined_to
)
924 ipa_dump_fn_summary (f
, node
);
927 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
928 boolean variable pointed to by DATA. */
931 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
934 bool *b
= (bool *) data
;
939 /* If OP refers to value of function parameter, return the corresponding
940 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
941 PARM_DECL) will be stored to *SIZE_P in that case too. */
944 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
945 HOST_WIDE_INT
*size_p
)
947 /* SSA_NAME referring to parm default def? */
948 if (TREE_CODE (op
) == SSA_NAME
949 && SSA_NAME_IS_DEFAULT_DEF (op
)
950 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
953 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
954 return SSA_NAME_VAR (op
);
956 /* Non-SSA parm reference? */
957 if (TREE_CODE (op
) == PARM_DECL
)
959 bool modified
= false;
962 ao_ref_init (&refd
, op
);
963 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
964 mark_modified
, &modified
, NULL
, NULL
,
965 fbi
->aa_walk_budget
+ 1);
968 fbi
->aa_walk_budget
= 0;
974 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
981 /* If OP refers to value of function parameter, return the corresponding
982 parameter. Also traverse chains of SSA register assignments. If non-NULL,
983 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
984 stored to *SIZE_P in that case too. */
987 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
988 HOST_WIDE_INT
*size_p
)
990 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
994 if (TREE_CODE (op
) == SSA_NAME
995 && !SSA_NAME_IS_DEFAULT_DEF (op
)
996 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
997 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
998 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1003 /* If OP refers to a value of a function parameter or value loaded from an
1004 aggregate passed to a parameter (either by value or reference), return TRUE
1005 and store the number of the parameter to *INDEX_P, the access size into
1006 *SIZE_P, and information whether and how it has been loaded from an
1007 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1008 statement in which OP is used or loaded. */
1011 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1012 gimple
*stmt
, tree op
, int *index_p
,
1013 HOST_WIDE_INT
*size_p
,
1014 struct agg_position_info
*aggpos
)
1016 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1018 gcc_checking_assert (aggpos
);
1021 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1024 aggpos
->agg_contents
= false;
1025 aggpos
->by_ref
= false;
1029 if (TREE_CODE (op
) == SSA_NAME
)
1031 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1032 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1034 stmt
= SSA_NAME_DEF_STMT (op
);
1035 op
= gimple_assign_rhs1 (stmt
);
1036 if (!REFERENCE_CLASS_P (op
))
1037 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1041 aggpos
->agg_contents
= true;
1042 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1043 stmt
, op
, index_p
, &aggpos
->offset
,
1044 size_p
, &aggpos
->by_ref
);
1047 /* See if statement might disappear after inlining.
1048 0 - means not eliminated
1049 1 - half of statements goes away
1050 2 - for sure it is eliminated.
1051 We are not terribly sophisticated, basically looking for simple abstraction
1052 penalty wrappers. */
1055 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1057 enum gimple_code code
= gimple_code (stmt
);
1058 enum tree_code rhs_code
;
1068 if (gimple_num_ops (stmt
) != 2)
1071 rhs_code
= gimple_assign_rhs_code (stmt
);
1073 /* Casts of parameters, loads from parameters passed by reference
1074 and stores to return value or parameters are often free after
1075 inlining dua to SRA and further combining.
1076 Assume that half of statements goes away. */
1077 if (CONVERT_EXPR_CODE_P (rhs_code
)
1078 || rhs_code
== VIEW_CONVERT_EXPR
1079 || rhs_code
== ADDR_EXPR
1080 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1082 tree rhs
= gimple_assign_rhs1 (stmt
);
1083 tree lhs
= gimple_assign_lhs (stmt
);
1084 tree inner_rhs
= get_base_address (rhs
);
1085 tree inner_lhs
= get_base_address (lhs
);
1086 bool rhs_free
= false;
1087 bool lhs_free
= false;
1094 /* Reads of parameter are expected to be free. */
1095 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1097 /* Match expressions of form &this->field. Those will most likely
1098 combine with something upstream after inlining. */
1099 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1101 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1102 if (TREE_CODE (op
) == PARM_DECL
)
1104 else if (TREE_CODE (op
) == MEM_REF
1105 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1110 /* When parameter is not SSA register because its address is taken
1111 and it is just copied into one, the statement will be completely
1112 free after inlining (we will copy propagate backward). */
1113 if (rhs_free
&& is_gimple_reg (lhs
))
1116 /* Reads of parameters passed by reference
1117 expected to be free (i.e. optimized out after inlining). */
1118 if (TREE_CODE (inner_rhs
) == MEM_REF
1119 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1122 /* Copying parameter passed by reference into gimple register is
1123 probably also going to copy propagate, but we can't be quite
1125 if (rhs_free
&& is_gimple_reg (lhs
))
1128 /* Writes to parameters, parameters passed by value and return value
1129 (either dirrectly or passed via invisible reference) are free.
1131 TODO: We ought to handle testcase like
1132 struct a {int a,b;};
1134 retrurnsturct (void)
1140 This translate into:
1155 For that we either need to copy ipa-split logic detecting writes
1157 if (TREE_CODE (inner_lhs
) == PARM_DECL
1158 || TREE_CODE (inner_lhs
) == RESULT_DECL
1159 || (TREE_CODE (inner_lhs
) == MEM_REF
1160 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1162 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1163 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1164 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1166 0))) == RESULT_DECL
))))
1169 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1171 if (lhs_free
&& rhs_free
)
1181 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1182 predicates to the CFG edges. */
1185 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1186 struct ipa_fn_summary
*summary
,
1193 struct agg_position_info aggpos
;
1194 enum tree_code code
, inverted_code
;
1200 last
= last_stmt (bb
);
1201 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1203 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1205 op
= gimple_cond_lhs (last
);
1206 /* TODO: handle conditionals like
1209 if (unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1211 code
= gimple_cond_code (last
);
1212 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1214 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1216 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1217 ? code
: inverted_code
);
1218 /* invert_tree_comparison will return ERROR_MARK on FP
1219 comparsions that are not EQ/NE instead of returning proper
1220 unordered one. Be sure it is not confused with NON_CONSTANT. */
1221 if (this_code
!= ERROR_MARK
)
1224 = add_condition (summary
, index
, size
, &aggpos
, this_code
,
1225 unshare_expr_without_location
1226 (gimple_cond_rhs (last
)));
1227 e
->aux
= edge_predicate_pool
.allocate ();
1228 *(predicate
*) e
->aux
= p
;
1233 if (TREE_CODE (op
) != SSA_NAME
)
1236 if (builtin_constant_p (op))
1240 Here we can predicate nonconstant_code. We can't
1241 really handle constant_code since we have no predicate
1242 for this and also the constant code is not known to be
1243 optimized away when inliner doen't see operand is constant.
1244 Other optimizers might think otherwise. */
1245 if (gimple_cond_code (last
) != NE_EXPR
1246 || !integer_zerop (gimple_cond_rhs (last
)))
1248 set_stmt
= SSA_NAME_DEF_STMT (op
);
1249 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1250 || gimple_call_num_args (set_stmt
) != 1)
1252 op2
= gimple_call_arg (set_stmt
, 0);
1253 if (!unmodified_parm_or_parm_agg_item (fbi
, set_stmt
, op2
, &index
, &size
,
1256 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1258 predicate p
= add_condition (summary
, index
, size
, &aggpos
,
1259 predicate::is_not_constant
, NULL_TREE
);
1260 e
->aux
= edge_predicate_pool
.allocate ();
1261 *(predicate
*) e
->aux
= p
;
1266 /* If BB ends by a switch we can turn into predicates, attach corresponding
1267 predicates to the CFG edges. */
1270 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1271 struct ipa_fn_summary
*summary
,
1278 struct agg_position_info aggpos
;
1284 lastg
= last_stmt (bb
);
1285 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1287 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1288 op
= gimple_switch_index (last
);
1289 if (!unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1292 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1294 e
->aux
= edge_predicate_pool
.allocate ();
1295 *(predicate
*) e
->aux
= false;
1297 n
= gimple_switch_num_labels (last
);
1298 for (case_idx
= 0; case_idx
< n
; ++case_idx
)
1300 tree cl
= gimple_switch_label (last
, case_idx
);
1304 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1305 min
= CASE_LOW (cl
);
1306 max
= CASE_HIGH (cl
);
1308 /* For default we might want to construct predicate that none
1309 of cases is met, but it is bit hard to do not having negations
1310 of conditionals handy. */
1314 p
= add_condition (summary
, index
, size
, &aggpos
, EQ_EXPR
,
1315 unshare_expr_without_location (min
));
1319 p1
= add_condition (summary
, index
, size
, &aggpos
, GE_EXPR
,
1320 unshare_expr_without_location (min
));
1321 p2
= add_condition (summary
, index
, size
, &aggpos
, LE_EXPR
,
1322 unshare_expr_without_location (max
));
1325 *(struct predicate
*) e
->aux
1326 = p
.or_with (summary
->conds
, *(struct predicate
*) e
->aux
);
1331 /* For each BB in NODE attach to its AUX pointer predicate under
1332 which it is executable. */
1335 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1336 struct cgraph_node
*node
,
1337 struct ipa_fn_summary
*summary
)
1339 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1343 FOR_EACH_BB_FN (bb
, my_function
)
1345 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1346 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1349 /* Entry block is always executable. */
1350 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1351 = edge_predicate_pool
.allocate ();
1352 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1354 /* A simple dataflow propagation of predicates forward in the CFG.
1355 TODO: work in reverse postorder. */
1359 FOR_EACH_BB_FN (bb
, my_function
)
1361 predicate p
= false;
1364 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1368 predicate this_bb_predicate
1369 = *(predicate
*) e
->src
->aux
;
1371 this_bb_predicate
&= (*(struct predicate
*) e
->aux
);
1372 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1378 gcc_checking_assert (!bb
->aux
);
1384 bb
->aux
= edge_predicate_pool
.allocate ();
1385 *((predicate
*) bb
->aux
) = p
;
1387 else if (p
!= *(predicate
*) bb
->aux
)
1389 /* This OR operation is needed to ensure monotonous data flow
1390 in the case we hit the limit on number of clauses and the
1391 and/or operations above give approximate answers. */
1392 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1393 if (p
!= *(predicate
*) bb
->aux
)
1396 *((predicate
*) bb
->aux
) = p
;
1405 /* Return predicate specifying when the STMT might have result that is not
1406 a compile time constant. */
1409 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1410 struct ipa_fn_summary
*summary
,
1412 vec
<predicate
> nonconstant_names
)
1418 while (UNARY_CLASS_P (expr
))
1419 expr
= TREE_OPERAND (expr
, 0);
1421 parm
= unmodified_parm (fbi
, NULL
, expr
, &size
);
1422 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1423 return add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1425 if (is_gimple_min_invariant (expr
))
1427 if (TREE_CODE (expr
) == SSA_NAME
)
1428 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1429 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1432 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1433 TREE_OPERAND (expr
, 0),
1439 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1440 TREE_OPERAND (expr
, 1),
1442 return p1
.or_with (summary
->conds
, p2
);
1444 else if (TREE_CODE (expr
) == COND_EXPR
)
1447 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1448 TREE_OPERAND (expr
, 0),
1454 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1455 TREE_OPERAND (expr
, 1),
1459 p1
= p1
.or_with (summary
->conds
, p2
);
1460 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1461 TREE_OPERAND (expr
, 2),
1463 return p2
.or_with (summary
->conds
, p1
);
1465 else if (TREE_CODE (expr
) == CALL_EXPR
)
1476 /* Return predicate specifying when the STMT might have result that is not
1477 a compile time constant. */
1480 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1481 struct ipa_fn_summary
*summary
,
1483 vec
<predicate
> nonconstant_names
)
1488 predicate op_non_const
;
1492 struct agg_position_info aggpos
;
1494 /* What statments might be optimized away
1495 when their arguments are constant. */
1496 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1497 && gimple_code (stmt
) != GIMPLE_COND
1498 && gimple_code (stmt
) != GIMPLE_SWITCH
1499 && (gimple_code (stmt
) != GIMPLE_CALL
1500 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1503 /* Stores will stay anyway. */
1504 if (gimple_store_p (stmt
))
1507 is_load
= gimple_assign_load_p (stmt
);
1509 /* Loads can be optimized when the value is known. */
1513 gcc_assert (gimple_assign_single_p (stmt
));
1514 op
= gimple_assign_rhs1 (stmt
);
1515 if (!unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, &base_index
, &size
,
1522 /* See if we understand all operands before we start
1523 adding conditionals. */
1524 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1526 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1527 /* For arguments we can build a condition. */
1528 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1530 if (TREE_CODE (use
) != SSA_NAME
)
1532 /* If we know when operand is constant,
1533 we still can say something useful. */
1534 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1541 add_condition (summary
, base_index
, size
, &aggpos
, predicate::changed
,
1544 op_non_const
= false;
1545 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1548 tree parm
= unmodified_parm (fbi
, stmt
, use
, &size
);
1551 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1553 if (index
!= base_index
)
1554 p
= add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1560 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1561 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1563 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1564 && gimple_op (stmt
, 0)
1565 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1566 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1568 return op_non_const
;
1571 struct record_modified_bb_info
1578 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1579 probability how often it changes between USE_BB.
1580 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1581 is in different loop nest, we can do better.
1582 This is all just estimate. In theory we look for minimal cut separating
1583 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1587 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1589 struct loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1590 if (l
&& l
->header
->count
< init_bb
->count
)
1595 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1596 set except for info->stmt. */
1599 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1601 struct record_modified_bb_info
*info
=
1602 (struct record_modified_bb_info
*) data
;
1603 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1605 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1607 bitmap_set_bit (info
->bb_set
,
1608 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1609 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1611 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1612 gimple_bb (info
->stmt
))->index
);
1615 fprintf (dump_file
, " Param ");
1616 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1617 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1618 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1620 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1621 gimple_bb (info
->stmt
))->index
);
1622 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
1627 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1628 will change since last invocation of STMT.
1630 Value 0 is reserved for compile time invariants.
1631 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1632 ought to be REG_BR_PROB_BASE / estimated_iters. */
1635 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
1637 tree op
= gimple_call_arg (stmt
, i
);
1638 basic_block bb
= gimple_bb (stmt
);
1640 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1641 op
= TREE_OPERAND (op
, 0);
1643 tree base
= get_base_address (op
);
1645 /* Global invariants never change. */
1646 if (is_gimple_min_invariant (base
))
1649 /* We would have to do non-trivial analysis to really work out what
1650 is the probability of value to change (i.e. when init statement
1651 is in a sibling loop of the call).
1653 We do an conservative estimate: when call is executed N times more often
1654 than the statement defining value, we take the frequency 1/N. */
1655 if (TREE_CODE (base
) == SSA_NAME
)
1657 profile_count init_count
;
1659 if (!bb
->count
.nonzero_p ())
1660 return REG_BR_PROB_BASE
;
1662 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1663 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1665 init_count
= get_minimal_bb
1666 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1667 gimple_bb (stmt
))->count
;
1669 if (init_count
< bb
->count
)
1670 return MAX ((init_count
.to_sreal_scale (bb
->count
)
1671 * REG_BR_PROB_BASE
).to_int (), 1);
1672 return REG_BR_PROB_BASE
;
1677 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1678 struct record_modified_bb_info info
;
1679 tree init
= ctor_for_folding (base
);
1681 if (init
!= error_mark_node
)
1683 if (!bb
->count
.nonzero_p ())
1684 return REG_BR_PROB_BASE
;
1687 fprintf (dump_file
, " Analyzing param change probablity of ");
1688 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1689 fprintf (dump_file
, "\n");
1691 ao_ref_init (&refd
, op
);
1694 info
.bb_set
= BITMAP_ALLOC (NULL
);
1696 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1697 NULL
, NULL
, fbi
->aa_walk_budget
);
1698 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
1703 fprintf (dump_file
, " Ran out of AA walking budget.\n");
1705 fprintf (dump_file
, " Set in same BB as used.\n");
1707 BITMAP_FREE (info
.bb_set
);
1708 return REG_BR_PROB_BASE
;
1713 /* Lookup the most frequent update of the value and believe that
1714 it dominates all the other; precise analysis here is difficult. */
1715 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
1716 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
1719 fprintf (dump_file
, " Set with count ");
1720 max
.dump (dump_file
);
1721 fprintf (dump_file
, " and used with count ");
1722 bb
->count
.dump (dump_file
);
1723 fprintf (dump_file
, " freq %f\n",
1724 max
.to_sreal_scale (bb
->count
).to_double ());
1727 BITMAP_FREE (info
.bb_set
);
1728 if (max
< bb
->count
)
1729 return MAX ((max
.to_sreal_scale (bb
->count
)
1730 * REG_BR_PROB_BASE
).to_int (), 1);
1731 return REG_BR_PROB_BASE
;
1735 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1736 sub-graph and if the predicate the condition depends on is known. If so,
1737 return true and store the pointer the predicate in *P. */
1740 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
1741 ipa_fn_summary
*summary
, basic_block bb
,
1743 vec
<predicate
> nonconstant_names
)
1747 basic_block first_bb
= NULL
;
1750 if (single_pred_p (bb
))
1756 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1758 if (single_succ_p (e
->src
))
1760 if (!single_pred_p (e
->src
))
1763 first_bb
= single_pred (e
->src
);
1764 else if (single_pred (e
->src
) != first_bb
)
1771 else if (e
->src
!= first_bb
)
1779 stmt
= last_stmt (first_bb
);
1781 || gimple_code (stmt
) != GIMPLE_COND
1782 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
1785 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1786 gimple_cond_lhs (stmt
),
1794 /* Given a PHI statement in a function described by inline properties SUMMARY
1795 and *P being the predicate describing whether the selected PHI argument is
1796 known, store a predicate for the result of the PHI statement into
1797 NONCONSTANT_NAMES, if possible. */
1800 predicate_for_phi_result (struct ipa_fn_summary
*summary
, gphi
*phi
,
1802 vec
<predicate
> nonconstant_names
)
1806 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
1808 tree arg
= gimple_phi_arg (phi
, i
)->def
;
1809 if (!is_gimple_min_invariant (arg
))
1811 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
1812 *p
= p
->or_with (summary
->conds
,
1813 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
1819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1821 fprintf (dump_file
, "\t\tphi predicate: ");
1822 p
->dump (dump_file
, summary
->conds
);
1824 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
1827 /* Return predicate specifying when array index in access OP becomes non-constant. */
1830 array_index_predicate (ipa_fn_summary
*info
,
1831 vec
< predicate
> nonconstant_names
, tree op
)
1833 predicate p
= false;
1834 while (handled_component_p (op
))
1836 if (TREE_CODE (op
) == ARRAY_REF
|| TREE_CODE (op
) == ARRAY_RANGE_REF
)
1838 if (TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
1839 p
= p
.or_with (info
->conds
,
1840 nonconstant_names
[SSA_NAME_VERSION
1841 (TREE_OPERAND (op
, 1))]);
1843 op
= TREE_OPERAND (op
, 0);
1848 /* For a typical usage of __builtin_expect (a<b, 1), we
1849 may introduce an extra relation stmt:
1850 With the builtin, we have
1853 t3 = __builtin_expect (t2, 1);
1856 Without the builtin, we have
1859 This affects the size/time estimation and may have
1860 an impact on the earlier inlining.
1861 Here find this pattern and fix it up later. */
1864 find_foldable_builtin_expect (basic_block bb
)
1866 gimple_stmt_iterator bsi
;
1868 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1870 gimple
*stmt
= gsi_stmt (bsi
);
1871 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
1872 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
1873 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
1875 tree var
= gimple_call_lhs (stmt
);
1876 tree arg
= gimple_call_arg (stmt
, 0);
1877 use_operand_p use_p
;
1884 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
1886 while (TREE_CODE (arg
) == SSA_NAME
)
1888 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
1889 if (!is_gimple_assign (stmt_tmp
))
1891 switch (gimple_assign_rhs_code (stmt_tmp
))
1910 arg
= gimple_assign_rhs1 (stmt_tmp
);
1913 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
1914 && gimple_code (use_stmt
) == GIMPLE_COND
)
1921 /* Return true when the basic blocks contains only clobbers followed by RESX.
1922 Such BBs are kept around to make removal of dead stores possible with
1923 presence of EH and will be optimized out by optimize_clobbers later in the
1926 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1927 that can be clobber only, too.. When it is false, the RESX is not necessary
1928 on the end of basic block. */
1931 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
1933 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1939 if (gsi_end_p (gsi
))
1941 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
1945 else if (!single_succ_p (bb
))
1948 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
1950 gimple
*stmt
= gsi_stmt (gsi
);
1951 if (is_gimple_debug (stmt
))
1953 if (gimple_clobber_p (stmt
))
1955 if (gimple_code (stmt
) == GIMPLE_LABEL
)
1960 /* See if all predecestors are either throws or clobber only BBs. */
1961 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1962 if (!(e
->flags
& EDGE_EH
)
1963 && !clobber_only_eh_bb_p (e
->src
, false))
1969 /* Return true if STMT compute a floating point expression that may be affected
1970 by -ffast-math and similar flags. */
1973 fp_expression_p (gimple
*stmt
)
1978 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
1979 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
1984 /* Analyze function body for NODE.
1985 EARLY indicates run from early optimization pipeline. */
1988 analyze_function_body (struct cgraph_node
*node
, bool early
)
1990 sreal time
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
);
1991 /* Estimate static overhead for function prologue/epilogue and alignment. */
1992 int size
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
);
1993 /* Benefits are scaled by probability of elimination that is in range
1996 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1998 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
1999 predicate bb_predicate
;
2000 struct ipa_func_body_info fbi
;
2001 vec
<predicate
> nonconstant_names
= vNULL
;
2004 predicate array_index
= true;
2005 gimple
*fix_builtin_expect_stmt
;
2007 gcc_assert (my_function
&& my_function
->cfg
);
2008 gcc_assert (cfun
== my_function
);
2010 memset(&fbi
, 0, sizeof(fbi
));
2011 vec_free (info
->conds
);
2013 vec_free (info
->size_time_table
);
2014 info
->size_time_table
= NULL
;
2016 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2017 so we can produce proper inline hints.
2019 When optimizing and analyzing for early inliner, initialize node params
2020 so we can produce correct BB predicates. */
2022 if (opt_for_fn (node
->decl
, optimize
))
2024 calculate_dominance_info (CDI_DOMINATORS
);
2026 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2029 ipa_check_create_node_params ();
2030 ipa_initialize_node_params (node
);
2033 if (ipa_node_params_sum
)
2036 fbi
.info
= IPA_NODE_REF (node
);
2037 fbi
.bb_infos
= vNULL
;
2038 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2039 fbi
.param_count
= count_formal_params (node
->decl
);
2040 fbi
.aa_walk_budget
= PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS
);
2042 nonconstant_names
.safe_grow_cleared
2043 (SSANAMES (my_function
)->length ());
2048 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2051 /* When we run into maximal number of entries, we assign everything to the
2052 constant truth case. Be sure to have it in list. */
2053 bb_predicate
= true;
2054 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2056 bb_predicate
= predicate::not_inlined ();
2057 info
->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
)
2058 * ipa_fn_summary::size_scale
,
2059 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
),
2064 compute_bb_predicates (&fbi
, node
, info
);
2065 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2066 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2067 for (n
= 0; n
< nblocks
; n
++)
2069 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2070 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2071 if (clobber_only_eh_bb_p (bb
))
2073 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2074 fprintf (dump_file
, "\n Ignoring BB %i;"
2075 " it will be optimized away by cleanup_clobbers\n",
2080 /* TODO: Obviously predicates can be propagated down across CFG. */
2084 bb_predicate
= *(predicate
*) bb
->aux
;
2086 bb_predicate
= false;
2089 bb_predicate
= true;
2091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2093 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2094 bb_predicate
.dump (dump_file
, info
->conds
);
2097 if (fbi
.info
&& nonconstant_names
.exists ())
2099 predicate phi_predicate
;
2100 bool first_phi
= true;
2102 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2106 && !phi_result_unknown_predicate (&fbi
, info
, bb
,
2111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2113 fprintf (dump_file
, " ");
2114 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2116 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2121 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2123 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
2126 gimple
*stmt
= gsi_stmt (bsi
);
2127 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2128 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2130 predicate will_be_nonconstant
;
2132 /* This relation stmt should be folded after we remove
2133 buildin_expect call. Adjust the cost here. */
2134 if (stmt
== fix_builtin_expect_stmt
)
2140 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2142 fprintf (dump_file
, " ");
2143 print_gimple_stmt (dump_file
, stmt
, 0);
2144 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2145 freq
.to_double (), this_size
,
2149 if (gimple_assign_load_p (stmt
) && nonconstant_names
.exists ())
2151 predicate this_array_index
;
2153 array_index_predicate (info
, nonconstant_names
,
2154 gimple_assign_rhs1 (stmt
));
2155 if (this_array_index
!= false)
2156 array_index
&= this_array_index
;
2158 if (gimple_store_p (stmt
) && nonconstant_names
.exists ())
2160 predicate this_array_index
;
2162 array_index_predicate (info
, nonconstant_names
,
2163 gimple_get_lhs (stmt
));
2164 if (this_array_index
!= false)
2165 array_index
&= this_array_index
;
2169 if (is_gimple_call (stmt
)
2170 && !gimple_call_internal_p (stmt
))
2172 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2173 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2175 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2176 resolved as constant. We however don't want to optimize
2177 out the cgraph edges. */
2178 if (nonconstant_names
.exists ()
2179 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2180 && gimple_call_lhs (stmt
)
2181 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2183 predicate false_p
= false;
2184 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2187 if (ipa_node_params_sum
)
2189 int count
= gimple_call_num_args (stmt
);
2193 es
->param
.safe_grow_cleared (count
);
2194 for (i
= 0; i
< count
; i
++)
2196 int prob
= param_change_prob (&fbi
, stmt
, i
);
2197 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2198 es
->param
[i
].change_prob
= prob
;
2202 es
->call_stmt_size
= this_size
;
2203 es
->call_stmt_time
= this_time
;
2204 es
->loop_depth
= bb_loop_depth (bb
);
2205 edge_set_predicate (edge
, &bb_predicate
);
2206 if (edge
->speculative
)
2208 cgraph_edge
*direct
, *indirect
;
2210 edge
->speculative_call_info (direct
, indirect
, ref
);
2211 gcc_assert (direct
== edge
);
2212 ipa_call_summary
*es2
2213 = ipa_call_summaries
->get_create (indirect
);
2214 ipa_call_summaries
->duplicate (edge
, indirect
,
2219 /* TODO: When conditional jump or swithc is known to be constant, but
2220 we did not translate it into the predicates, we really can account
2221 just maximum of the possible paths. */
2224 = will_be_nonconstant_predicate (&fbi
, info
,
2225 stmt
, nonconstant_names
);
2227 will_be_nonconstant
= true;
2228 if (this_time
|| this_size
)
2230 sreal final_time
= (sreal
)this_time
* freq
;
2232 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2233 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2235 "\t\t50%% will be eliminated by inlining\n");
2236 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2237 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2239 struct predicate p
= bb_predicate
& will_be_nonconstant
;
2241 /* We can ignore statement when we proved it is never going
2242 to happen, but we cannot do that for call statements
2243 because edges are accounted specially. */
2245 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2251 /* We account everything but the calls. Calls have their own
2252 size/time info attached to cgraph edges. This is necessary
2253 in order to make the cost disappear after inlining. */
2254 if (!is_gimple_call (stmt
))
2258 predicate ip
= bb_predicate
& predicate::not_inlined ();
2259 info
->account_size_time (this_size
* prob
,
2260 (final_time
* prob
) / 2, ip
,
2264 info
->account_size_time (this_size
* (2 - prob
),
2265 (final_time
* (2 - prob
) / 2),
2270 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2272 info
->fp_expressions
= true;
2274 fprintf (dump_file
, " fp_expression set\n");
2277 gcc_assert (time
>= 0);
2278 gcc_assert (size
>= 0);
2282 set_hint_predicate (&ipa_fn_summaries
->get_create (node
)->array_index
,
2286 if (nonconstant_names
.exists () && !early
)
2289 predicate loop_iterations
= true;
2290 predicate loop_stride
= true;
2292 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2293 flow_loops_dump (dump_file
, NULL
, 0);
2295 FOR_EACH_LOOP (loop
, 0)
2300 struct tree_niter_desc niter_desc
;
2301 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2303 exits
= get_loop_exit_edges (loop
);
2304 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2305 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2306 && !is_gimple_min_invariant (niter_desc
.niter
))
2308 predicate will_be_nonconstant
2309 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2312 if (will_be_nonconstant
!= true)
2313 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2314 if (will_be_nonconstant
!= true
2315 && will_be_nonconstant
!= false)
2316 /* This is slightly inprecise. We may want to represent each
2317 loop with independent predicate. */
2318 loop_iterations
&= will_be_nonconstant
;
2323 /* To avoid quadratic behavior we analyze stride predicates only
2324 with respect to the containing loop. Thus we simply iterate
2325 over all defs in the outermost loop body. */
2326 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2327 loop
!= NULL
; loop
= loop
->next
)
2329 basic_block
*body
= get_loop_body (loop
);
2330 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2332 gimple_stmt_iterator gsi
;
2333 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2334 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2337 gimple
*stmt
= gsi_stmt (gsi
);
2339 if (!is_gimple_assign (stmt
))
2342 tree def
= gimple_assign_lhs (stmt
);
2343 if (TREE_CODE (def
) != SSA_NAME
)
2347 if (!simple_iv (loop_containing_stmt (stmt
),
2348 loop_containing_stmt (stmt
),
2350 || is_gimple_min_invariant (iv
.step
))
2353 predicate will_be_nonconstant
2354 = will_be_nonconstant_expr_predicate (&fbi
, info
, iv
.step
,
2356 if (will_be_nonconstant
!= true)
2357 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2358 if (will_be_nonconstant
!= true
2359 && will_be_nonconstant
!= false)
2360 /* This is slightly inprecise. We may want to represent
2361 each loop with independent predicate. */
2362 loop_stride
= loop_stride
& will_be_nonconstant
;
2367 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2368 set_hint_predicate (&s
->loop_iterations
, loop_iterations
);
2369 set_hint_predicate (&s
->loop_stride
, loop_stride
);
2372 FOR_ALL_BB_FN (bb
, my_function
)
2378 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2380 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2383 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2387 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2389 s
->self_size
= size
;
2390 nonconstant_names
.release ();
2391 ipa_release_body_info (&fbi
);
2392 if (opt_for_fn (node
->decl
, optimize
))
2395 loop_optimizer_finalize ();
2396 else if (!ipa_edge_args_sum
)
2397 ipa_free_all_node_params ();
2398 free_dominance_info (CDI_DOMINATORS
);
2402 fprintf (dump_file
, "\n");
2403 ipa_dump_fn_summary (dump_file
, node
);
2408 /* Compute function summary.
2409 EARLY is true when we compute parameters during early opts. */
2412 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2414 HOST_WIDE_INT self_stack_size
;
2415 struct cgraph_edge
*e
;
2416 struct ipa_fn_summary
*info
;
2418 gcc_assert (!node
->global
.inlined_to
);
2420 if (!ipa_fn_summaries
)
2421 ipa_fn_summary_alloc ();
2423 /* Create a new ipa_fn_summary. */
2424 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
2425 ipa_fn_summaries
->remove (node
);
2426 info
= ipa_fn_summaries
->get_create (node
);
2428 /* Estimate the stack size for the function if we're optimizing. */
2429 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2430 ? estimated_stack_frame_size (node
) : 0;
2431 info
->estimated_self_stack_size
= self_stack_size
;
2432 info
->estimated_stack_size
= self_stack_size
;
2433 info
->stack_frame_offset
= 0;
2435 if (node
->thunk
.thunk_p
)
2437 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
2440 node
->local
.can_change_signature
= false;
2441 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2442 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2443 info
->account_size_time (ipa_fn_summary::size_scale
2445 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS
),
2447 (PARAM_UNINLINED_FUNCTION_THUNK_TIME
), t
, t
);
2448 t
= predicate::not_inlined ();
2449 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2450 ipa_update_overall_fn_summary (node
);
2451 info
->self_size
= info
->size
;
2452 if (stdarg_p (TREE_TYPE (node
->decl
)))
2454 info
->inlinable
= false;
2455 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2458 info
->inlinable
= true;
2462 /* Even is_gimple_min_invariant rely on current_function_decl. */
2463 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2465 /* Can this function be inlined at all? */
2466 if (!opt_for_fn (node
->decl
, optimize
)
2467 && !lookup_attribute ("always_inline",
2468 DECL_ATTRIBUTES (node
->decl
)))
2469 info
->inlinable
= false;
2471 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2473 /* Type attributes can use parameter indices to describe them. */
2474 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2475 /* Likewise for #pragma omp declare simd functions or functions
2476 with simd attribute. */
2477 || lookup_attribute ("omp declare simd",
2478 DECL_ATTRIBUTES (node
->decl
)))
2479 node
->local
.can_change_signature
= false;
2482 /* Otherwise, inlinable functions always can change signature. */
2483 if (info
->inlinable
)
2484 node
->local
.can_change_signature
= true;
2487 /* Functions calling builtin_apply cannot change signature. */
2488 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2490 tree
cdecl = e
->callee
->decl
;
2491 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
2492 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
2495 node
->local
.can_change_signature
= !e
;
2498 analyze_function_body (node
, early
);
2501 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2502 if (e
->callee
->comdat_local_p ())
2504 node
->calls_comdat_local
= (e
!= NULL
);
2506 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2507 info
->size
= info
->self_size
;
2508 info
->stack_frame_offset
= 0;
2509 info
->estimated_stack_size
= info
->estimated_self_stack_size
;
2511 /* Code above should compute exactly the same result as
2512 ipa_update_overall_fn_summary but because computation happens in
2513 different order the roundoff errors result in slight changes. */
2514 ipa_update_overall_fn_summary (node
);
2515 /* In LTO mode we may have speculative edges set. */
2516 gcc_assert (in_lto_p
|| info
->size
== info
->self_size
);
2520 /* Compute parameters of functions used by inliner using
2521 current_function_decl. */
2524 compute_fn_summary_for_current (void)
2526 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2530 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2531 KNOWN_CONTEXTS and KNOWN_AGGS. */
2534 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2535 int *size
, int *time
,
2536 vec
<tree
> known_vals
,
2537 vec
<ipa_polymorphic_call_context
> known_contexts
,
2538 vec
<ipa_agg_jump_function_p
> known_aggs
)
2541 struct cgraph_node
*callee
;
2542 struct ipa_fn_summary
*isummary
;
2543 enum availability avail
;
2546 if (!known_vals
.exists () && !known_contexts
.exists ())
2548 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2551 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2552 known_aggs
, &speculative
);
2553 if (!target
|| speculative
)
2556 /* Account for difference in cost between indirect and direct calls. */
2557 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2558 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2559 gcc_checking_assert (*time
>= 0);
2560 gcc_checking_assert (*size
>= 0);
2562 callee
= cgraph_node::get (target
);
2563 if (!callee
|| !callee
->definition
)
2565 callee
= callee
->function_symbol (&avail
);
2566 if (avail
< AVAIL_AVAILABLE
)
2568 isummary
= ipa_fn_summaries
->get (callee
);
2569 if (isummary
== NULL
)
2572 return isummary
->inlinable
;
2575 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2576 handle edge E with probability PROB.
2577 Set HINTS if edge may be devirtualized.
2578 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2582 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2585 vec
<tree
> known_vals
,
2586 vec
<ipa_polymorphic_call_context
> known_contexts
,
2587 vec
<ipa_agg_jump_function_p
> known_aggs
,
2590 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2591 int call_size
= es
->call_stmt_size
;
2592 int call_time
= es
->call_stmt_time
;
2595 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2596 known_vals
, known_contexts
, known_aggs
)
2597 && hints
&& e
->maybe_hot_p ())
2598 *hints
|= INLINE_HINT_indirect_call
;
2599 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2602 *min_size
+= cur_size
;
2603 if (prob
== REG_BR_PROB_BASE
)
2604 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2606 *time
+= ((sreal
)call_time
* prob
) * e
->sreal_frequency ();
2611 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2612 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2613 describe context of the call site. */
2616 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2617 int *min_size
, sreal
*time
,
2619 clause_t possible_truths
,
2620 vec
<tree
> known_vals
,
2621 vec
<ipa_polymorphic_call_context
> known_contexts
,
2622 vec
<ipa_agg_jump_function_p
> known_aggs
)
2624 struct cgraph_edge
*e
;
2625 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2627 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2629 /* Do not care about zero sized builtins. */
2630 if (e
->inline_failed
&& !es
->call_stmt_size
)
2632 gcc_checking_assert (!es
->call_stmt_time
);
2636 || es
->predicate
->evaluate (possible_truths
))
2638 if (e
->inline_failed
)
2640 /* Predicates of calls shall not use NOT_CHANGED codes,
2641 sowe do not need to compute probabilities. */
2642 estimate_edge_size_and_time (e
, size
,
2643 es
->predicate
? NULL
: min_size
,
2644 time
, REG_BR_PROB_BASE
,
2645 known_vals
, known_contexts
,
2649 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2652 known_vals
, known_contexts
,
2656 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2658 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2660 || es
->predicate
->evaluate (possible_truths
))
2661 estimate_edge_size_and_time (e
, size
,
2662 es
->predicate
? NULL
: min_size
,
2663 time
, REG_BR_PROB_BASE
,
2664 known_vals
, known_contexts
, known_aggs
,
2670 /* Estimate size and time needed to execute NODE assuming
2671 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2672 information about NODE's arguments. If non-NULL use also probability
2673 information present in INLINE_PARAM_SUMMARY vector.
2674 Additionally detemine hints determined by the context. Finally compute
2675 minimal size needed for the call that is independent on the call context and
2676 can be used for fast estimates. Return the values in RET_SIZE,
2677 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2680 estimate_node_size_and_time (struct cgraph_node
*node
,
2681 clause_t possible_truths
,
2682 clause_t nonspec_possible_truths
,
2683 vec
<tree
> known_vals
,
2684 vec
<ipa_polymorphic_call_context
> known_contexts
,
2685 vec
<ipa_agg_jump_function_p
> known_aggs
,
2686 int *ret_size
, int *ret_min_size
,
2688 sreal
*ret_nonspecialized_time
,
2689 ipa_hints
*ret_hints
,
2690 vec
<inline_param_summary
>
2691 inline_param_summary
)
2693 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2698 ipa_hints hints
= 0;
2701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2704 fprintf (dump_file
, " Estimating body: %s/%i\n"
2705 " Known to be false: ", node
->name (),
2708 for (i
= predicate::not_inlined_condition
;
2709 i
< (predicate::first_dynamic_condition
2710 + (int) vec_safe_length (info
->conds
)); i
++)
2711 if (!(possible_truths
& (1 << i
)))
2714 fprintf (dump_file
, ", ");
2716 dump_condition (dump_file
, info
->conds
, i
);
2720 estimate_calls_size_and_time (node
, &size
, &min_size
, &time
, &hints
, possible_truths
,
2721 known_vals
, known_contexts
, known_aggs
);
2722 sreal nonspecialized_time
= time
;
2724 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
2726 bool exec
= e
->exec_predicate
.evaluate (nonspec_possible_truths
);
2728 /* Because predicates are conservative, it can happen that nonconst is 1
2732 bool nonconst
= e
->nonconst_predicate
.evaluate (possible_truths
);
2734 gcc_checking_assert (e
->time
>= 0);
2735 gcc_checking_assert (time
>= 0);
2737 /* We compute specialized size only because size of nonspecialized
2738 copy is context independent.
2740 The difference between nonspecialized execution and specialized is
2741 that nonspecialized is not going to have optimized out computations
2742 known to be constant in a specialized setting. */
2745 nonspecialized_time
+= e
->time
;
2748 else if (!inline_param_summary
.exists ())
2755 int prob
= e
->nonconst_predicate
.probability
2756 (info
->conds
, possible_truths
,
2757 inline_param_summary
);
2758 gcc_checking_assert (prob
>= 0);
2759 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
2760 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
2762 gcc_checking_assert (time
>= 0);
2765 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
2766 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
2767 min_size
= (*info
->size_time_table
)[0].size
;
2768 gcc_checking_assert (size
>= 0);
2769 gcc_checking_assert (time
>= 0);
2770 /* nonspecialized_time should be always bigger than specialized time.
2771 Roundoff issues however may get into the way. */
2772 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
2774 /* Roundoff issues may make specialized time bigger than nonspecialized
2775 time. We do not really want that to happen because some heurstics
2776 may get confused by seeing negative speedups. */
2777 if (time
> nonspecialized_time
)
2778 time
= nonspecialized_time
;
2780 if (info
->loop_iterations
2781 && !info
->loop_iterations
->evaluate (possible_truths
))
2782 hints
|= INLINE_HINT_loop_iterations
;
2783 if (info
->loop_stride
2784 && !info
->loop_stride
->evaluate (possible_truths
))
2785 hints
|= INLINE_HINT_loop_stride
;
2786 if (info
->array_index
2787 && !info
->array_index
->evaluate (possible_truths
))
2788 hints
|= INLINE_HINT_array_index
;
2790 hints
|= INLINE_HINT_in_scc
;
2791 if (DECL_DECLARED_INLINE_P (node
->decl
))
2792 hints
|= INLINE_HINT_declared_inline
;
2794 size
= RDIV (size
, ipa_fn_summary::size_scale
);
2795 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
2797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2798 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
2799 time
.to_double (), nonspecialized_time
.to_double ());
2802 if (ret_nonspecialized_time
)
2803 *ret_nonspecialized_time
= nonspecialized_time
;
2807 *ret_min_size
= min_size
;
2814 /* Estimate size and time needed to execute callee of EDGE assuming that
2815 parameters known to be constant at caller of EDGE are propagated.
2816 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2817 and types for parameters. */
2820 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
2821 vec
<tree
> known_vals
,
2822 vec
<ipa_polymorphic_call_context
>
2824 vec
<ipa_agg_jump_function_p
> known_aggs
,
2825 int *ret_size
, sreal
*ret_time
,
2826 sreal
*ret_nonspec_time
,
2829 clause_t clause
, nonspec_clause
;
2831 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
2832 &clause
, &nonspec_clause
);
2833 estimate_node_size_and_time (node
, clause
, nonspec_clause
,
2834 known_vals
, known_contexts
,
2835 known_aggs
, ret_size
, NULL
, ret_time
,
2836 ret_nonspec_time
, hints
, vNULL
);
2840 /* Update summary information of inline clones after inlining.
2841 Compute peak stack usage. */
2844 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
2846 struct cgraph_edge
*e
;
2847 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (node
);
2848 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (node
->callers
->caller
);
2851 callee_info
->stack_frame_offset
2852 = caller_info
->stack_frame_offset
2853 + caller_info
->estimated_self_stack_size
;
2854 peak
= callee_info
->stack_frame_offset
2855 + callee_info
->estimated_self_stack_size
;
2857 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
->global
.inlined_to
);
2858 if (s
->estimated_stack_size
< peak
)
2859 s
->estimated_stack_size
= peak
;
2860 ipa_propagate_frequency (node
);
2861 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2863 if (!e
->inline_failed
)
2864 inline_update_callee_summaries (e
->callee
, depth
);
2865 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2867 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2868 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2871 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2872 When functoin A is inlined in B and A calls C with parameter that
2873 changes with probability PROB1 and C is known to be passthroug
2874 of argument if B that change with probability PROB2, the probability
2875 of change is now PROB1*PROB2. */
2878 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
2879 struct cgraph_edge
*edge
)
2881 if (ipa_node_params_sum
)
2884 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2885 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2886 struct ipa_call_summary
*inlined_es
2887 = ipa_call_summaries
->get (inlined_edge
);
2889 if (es
->param
.length () == 0)
2892 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
2894 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
2895 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2896 || jfunc
->type
== IPA_JF_ANCESTOR
)
2898 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
2899 ? ipa_get_jf_pass_through_formal_id (jfunc
)
2900 : ipa_get_jf_ancestor_formal_id (jfunc
);
2901 if (id
< (int) inlined_es
->param
.length ())
2903 int prob1
= es
->param
[i
].change_prob
;
2904 int prob2
= inlined_es
->param
[id
].change_prob
;
2905 int prob
= combine_probabilities (prob1
, prob2
);
2907 if (prob1
&& prob2
&& !prob
)
2910 es
->param
[i
].change_prob
= prob
;
2917 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2919 Remap predicates of callees of NODE. Rest of arguments match
2922 Also update change probabilities. */
2925 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
2926 struct cgraph_node
*node
,
2927 struct ipa_fn_summary
*info
,
2928 struct ipa_fn_summary
*callee_info
,
2929 vec
<int> operand_map
,
2930 vec
<int> offset_map
,
2931 clause_t possible_truths
,
2932 predicate
*toplev_predicate
)
2934 struct cgraph_edge
*e
, *next
;
2935 for (e
= node
->callees
; e
; e
= next
)
2937 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2939 next
= e
->next_callee
;
2941 if (e
->inline_failed
)
2943 remap_edge_change_prob (inlined_edge
, e
);
2947 p
= es
->predicate
->remap_after_inlining
2948 (info
, callee_info
, operand_map
,
2949 offset_map
, possible_truths
,
2951 edge_set_predicate (e
, &p
);
2954 edge_set_predicate (e
, toplev_predicate
);
2957 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
2958 operand_map
, offset_map
, possible_truths
,
2961 for (e
= node
->indirect_calls
; e
; e
= next
)
2963 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2965 next
= e
->next_callee
;
2967 remap_edge_change_prob (inlined_edge
, e
);
2970 p
= es
->predicate
->remap_after_inlining
2971 (info
, callee_info
, operand_map
, offset_map
,
2972 possible_truths
, *toplev_predicate
);
2973 edge_set_predicate (e
, &p
);
2976 edge_set_predicate (e
, toplev_predicate
);
2980 /* Same as remap_predicate, but set result into hint *HINT. */
2983 remap_hint_predicate (struct ipa_fn_summary
*info
,
2984 struct ipa_fn_summary
*callee_info
,
2986 vec
<int> operand_map
,
2987 vec
<int> offset_map
,
2988 clause_t possible_truths
,
2989 predicate
*toplev_predicate
)
2995 p
= (*hint
)->remap_after_inlining
2997 operand_map
, offset_map
,
2998 possible_truths
, *toplev_predicate
);
2999 if (p
!= false && p
!= true)
3002 set_hint_predicate (hint
, p
);
3008 /* We inlined EDGE. Update summary of the function we inlined into. */
3011 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
3013 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
3014 struct cgraph_node
*to
= (edge
->caller
->global
.inlined_to
3015 ? edge
->caller
->global
.inlined_to
: edge
->caller
);
3016 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
3017 clause_t clause
= 0; /* not_inline is known to be false. */
3019 vec
<int> operand_map
= vNULL
;
3020 vec
<int> offset_map
= vNULL
;
3022 predicate toplev_predicate
;
3023 predicate true_p
= true;
3024 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3027 toplev_predicate
= *es
->predicate
;
3029 toplev_predicate
= true;
3031 info
->fp_expressions
|= callee_info
->fp_expressions
;
3033 if (callee_info
->conds
)
3034 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
3035 if (ipa_node_params_sum
&& callee_info
->conds
)
3037 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3038 int count
= ipa_get_cs_argument_count (args
);
3043 operand_map
.safe_grow_cleared (count
);
3044 offset_map
.safe_grow_cleared (count
);
3046 for (i
= 0; i
< count
; i
++)
3048 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3051 /* TODO: handle non-NOPs when merging. */
3052 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3054 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3055 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3056 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3059 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3061 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3062 if (offset
>= 0 && offset
< INT_MAX
)
3064 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3065 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3067 offset_map
[i
] = offset
;
3070 operand_map
[i
] = map
;
3071 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3074 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3077 p
= e
->exec_predicate
.remap_after_inlining
3078 (info
, callee_info
, operand_map
,
3081 predicate nonconstp
;
3082 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3083 (info
, callee_info
, operand_map
,
3086 if (p
!= false && nonconstp
!= false)
3088 sreal add_time
= ((sreal
)e
->time
* edge
->sreal_frequency ());
3089 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3091 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3092 if (prob
!= REG_BR_PROB_BASE
3093 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3095 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3096 (double) prob
/ REG_BR_PROB_BASE
);
3098 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3101 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3102 offset_map
, clause
, &toplev_predicate
);
3103 remap_hint_predicate (info
, callee_info
,
3104 &callee_info
->loop_iterations
,
3105 operand_map
, offset_map
, clause
, &toplev_predicate
);
3106 remap_hint_predicate (info
, callee_info
,
3107 &callee_info
->loop_stride
,
3108 operand_map
, offset_map
, clause
, &toplev_predicate
);
3109 remap_hint_predicate (info
, callee_info
,
3110 &callee_info
->array_index
,
3111 operand_map
, offset_map
, clause
, &toplev_predicate
);
3113 ipa_call_summary
*s
= ipa_call_summaries
->get (edge
);
3114 inline_update_callee_summaries (edge
->callee
, s
->loop_depth
);
3116 /* We do not maintain predicates of inlined edges, free it. */
3117 edge_set_predicate (edge
, &true_p
);
3118 /* Similarly remove param summaries. */
3119 es
->param
.release ();
3120 operand_map
.release ();
3121 offset_map
.release ();
3124 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3125 and time. Recompute it. */
3128 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3130 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3136 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3138 info
->size
+= e
->size
;
3139 info
->time
+= e
->time
;
3141 estimate_calls_size_and_time (node
, &info
->size
, &info
->min_size
,
3143 ~(clause_t
) (1 << predicate::false_condition
),
3144 vNULL
, vNULL
, vNULL
);
3145 info
->size
= (info
->size
+ ipa_fn_summary::size_scale
/ 2) / ipa_fn_summary::size_scale
;
3149 /* This function performs intraprocedural analysis in NODE that is required to
3150 inline indirect calls. */
3153 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3155 ipa_analyze_node (node
);
3156 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3158 ipa_print_node_params (dump_file
, node
);
3159 ipa_print_node_jump_functions (dump_file
, node
);
3164 /* Note function body size. */
3167 inline_analyze_function (struct cgraph_node
*node
)
3169 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3172 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3173 node
->name (), node
->order
);
3174 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3175 inline_indirect_intraprocedural_analysis (node
);
3176 compute_fn_summary (node
, false);
3179 struct cgraph_edge
*e
;
3180 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3181 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3182 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3183 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3190 /* Called when new function is inserted to callgraph late. */
3193 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3195 inline_analyze_function (node
);
3198 /* Note function body size. */
3201 ipa_fn_summary_generate (void)
3203 struct cgraph_node
*node
;
3205 FOR_EACH_DEFINED_FUNCTION (node
)
3206 if (DECL_STRUCT_FUNCTION (node
->decl
))
3207 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
3209 ipa_fn_summary_alloc ();
3211 ipa_fn_summaries
->enable_insertion_hook ();
3213 ipa_register_cgraph_hooks ();
3215 FOR_EACH_DEFINED_FUNCTION (node
)
3217 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3218 || opt_for_fn (node
->decl
, optimize
)))
3219 inline_analyze_function (node
);
3223 /* Write inline summary for edge E to OB. */
3226 read_ipa_call_summary (struct lto_input_block
*ib
, struct cgraph_edge
*e
,
3229 struct ipa_call_summary
*es
= prevails
3230 ? ipa_call_summaries
->get_create (e
) : NULL
;
3234 int size
= streamer_read_uhwi (ib
);
3235 int time
= streamer_read_uhwi (ib
);
3236 int depth
= streamer_read_uhwi (ib
);
3240 es
->call_stmt_size
= size
;
3241 es
->call_stmt_time
= time
;
3242 es
->loop_depth
= depth
;
3245 bitpack_d bp
= streamer_read_bitpack (ib
);
3247 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3249 bp_unpack_value (&bp
, 1);
3253 edge_set_predicate (e
, &p
);
3254 length
= streamer_read_uhwi (ib
);
3255 if (length
&& es
&& e
->possibly_call_in_translation_unit_p ())
3257 es
->param
.safe_grow_cleared (length
);
3258 for (i
= 0; i
< length
; i
++)
3259 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3263 for (i
= 0; i
< length
; i
++)
3264 streamer_read_uhwi (ib
);
3269 /* Stream in inline summaries from the section. */
3272 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3275 const struct lto_function_header
*header
=
3276 (const struct lto_function_header
*) data
;
3277 const int cfg_offset
= sizeof (struct lto_function_header
);
3278 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3279 const int string_offset
= main_offset
+ header
->main_size
;
3280 struct data_in
*data_in
;
3281 unsigned int i
, count2
, j
;
3282 unsigned int f_count
;
3284 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3285 file_data
->mode_table
);
3288 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3289 header
->string_size
, vNULL
);
3290 f_count
= streamer_read_uhwi (&ib
);
3291 for (i
= 0; i
< f_count
; i
++)
3294 struct cgraph_node
*node
;
3295 struct ipa_fn_summary
*info
;
3296 lto_symtab_encoder_t encoder
;
3297 struct bitpack_d bp
;
3298 struct cgraph_edge
*e
;
3301 index
= streamer_read_uhwi (&ib
);
3302 encoder
= file_data
->symtab_node_encoder
;
3303 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3305 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
3307 int stack_size
= streamer_read_uhwi (&ib
);
3308 int size
= streamer_read_uhwi (&ib
);
3309 sreal time
= sreal::stream_in (&ib
);
3313 info
->estimated_stack_size
3314 = info
->estimated_self_stack_size
= stack_size
;
3315 info
->size
= info
->self_size
= size
;
3319 bp
= streamer_read_bitpack (&ib
);
3322 info
->inlinable
= bp_unpack_value (&bp
, 1);
3323 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3327 bp_unpack_value (&bp
, 1);
3328 bp_unpack_value (&bp
, 1);
3331 count2
= streamer_read_uhwi (&ib
);
3332 gcc_assert (!info
|| !info
->conds
);
3333 for (j
= 0; j
< count2
; j
++)
3336 c
.operand_num
= streamer_read_uhwi (&ib
);
3337 c
.size
= streamer_read_uhwi (&ib
);
3338 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3339 c
.val
= stream_read_tree (&ib
, data_in
);
3340 bp
= streamer_read_bitpack (&ib
);
3341 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3342 c
.by_ref
= bp_unpack_value (&bp
, 1);
3344 c
.offset
= streamer_read_uhwi (&ib
);
3346 vec_safe_push (info
->conds
, c
);
3348 count2
= streamer_read_uhwi (&ib
);
3349 gcc_assert (!info
|| !info
->size_time_table
);
3350 for (j
= 0; j
< count2
; j
++)
3352 struct size_time_entry e
;
3354 e
.size
= streamer_read_uhwi (&ib
);
3355 e
.time
= sreal::stream_in (&ib
);
3356 e
.exec_predicate
.stream_in (&ib
);
3357 e
.nonconst_predicate
.stream_in (&ib
);
3360 vec_safe_push (info
->size_time_table
, e
);
3365 set_hint_predicate (&info
->loop_iterations
, p
);
3368 set_hint_predicate (&info
->loop_stride
, p
);
3371 set_hint_predicate (&info
->array_index
, p
);
3372 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3373 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3374 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3375 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3378 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3380 lto_data_in_delete (data_in
);
3384 /* Read inline summary. Jump functions are shared among ipa-cp
3385 and inliner, so when ipa-cp is active, we don't need to write them
3389 ipa_fn_summary_read (void)
3391 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3392 struct lto_file_decl_data
*file_data
;
3395 ipa_fn_summary_alloc ();
3397 while ((file_data
= file_data_vec
[j
++]))
3400 const char *data
= lto_get_section_data (file_data
,
3401 LTO_section_ipa_fn_summary
,
3404 inline_read_section (file_data
, data
, len
);
3406 /* Fatal error here. We do not want to support compiling ltrans units
3407 with different version of compiler or different flags than the WPA
3408 unit, so this should never happen. */
3409 fatal_error (input_location
,
3410 "ipa inline summary is missing in input file");
3412 ipa_register_cgraph_hooks ();
3414 ipa_prop_read_jump_functions ();
3416 gcc_assert (ipa_fn_summaries
);
3417 ipa_fn_summaries
->enable_insertion_hook ();
3421 /* Write inline summary for edge E to OB. */
3424 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3426 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3429 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3430 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3431 streamer_write_uhwi (ob
, es
->loop_depth
);
3433 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3434 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3435 streamer_write_bitpack (&bp
);
3438 es
->predicate
->stream_out (ob
);
3440 streamer_write_uhwi (ob
, 0);
3441 streamer_write_uhwi (ob
, es
->param
.length ());
3442 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3443 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3447 /* Write inline summary for node in SET.
3448 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3449 active, we don't need to write them twice. */
3452 ipa_fn_summary_write (void)
3454 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3455 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3456 unsigned int count
= 0;
3459 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3461 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3462 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3463 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3466 streamer_write_uhwi (ob
, count
);
3468 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3470 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3471 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3472 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3474 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3475 struct bitpack_d bp
;
3476 struct cgraph_edge
*edge
;
3479 struct condition
*c
;
3481 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3482 streamer_write_hwi (ob
, info
->estimated_self_stack_size
);
3483 streamer_write_hwi (ob
, info
->self_size
);
3484 info
->time
.stream_out (ob
);
3485 bp
= bitpack_create (ob
->main_stream
);
3486 bp_pack_value (&bp
, info
->inlinable
, 1);
3487 bp_pack_value (&bp
, false, 1);
3488 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3489 streamer_write_bitpack (&bp
);
3490 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3491 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3493 streamer_write_uhwi (ob
, c
->operand_num
);
3494 streamer_write_uhwi (ob
, c
->size
);
3495 streamer_write_uhwi (ob
, c
->code
);
3496 stream_write_tree (ob
, c
->val
, true);
3497 bp
= bitpack_create (ob
->main_stream
);
3498 bp_pack_value (&bp
, c
->agg_contents
, 1);
3499 bp_pack_value (&bp
, c
->by_ref
, 1);
3500 streamer_write_bitpack (&bp
);
3501 if (c
->agg_contents
)
3502 streamer_write_uhwi (ob
, c
->offset
);
3504 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3505 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3507 streamer_write_uhwi (ob
, e
->size
);
3508 e
->time
.stream_out (ob
);
3509 e
->exec_predicate
.stream_out (ob
);
3510 e
->nonconst_predicate
.stream_out (ob
);
3512 if (info
->loop_iterations
)
3513 info
->loop_iterations
->stream_out (ob
);
3515 streamer_write_uhwi (ob
, 0);
3516 if (info
->loop_stride
)
3517 info
->loop_stride
->stream_out (ob
);
3519 streamer_write_uhwi (ob
, 0);
3520 if (info
->array_index
)
3521 info
->array_index
->stream_out (ob
);
3523 streamer_write_uhwi (ob
, 0);
3524 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3525 write_ipa_call_summary (ob
, edge
);
3526 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3527 write_ipa_call_summary (ob
, edge
);
3530 streamer_write_char_stream (ob
->main_stream
, 0);
3531 produce_asm (ob
, NULL
);
3532 destroy_output_block (ob
);
3535 ipa_prop_write_jump_functions ();
3539 /* Release inline summary. */
3542 ipa_free_fn_summary (void)
3544 struct cgraph_node
*node
;
3545 if (!ipa_call_summaries
)
3547 FOR_EACH_DEFINED_FUNCTION (node
)
3549 ipa_fn_summaries
->remove (node
);
3550 ipa_fn_summaries
->release ();
3551 ipa_fn_summaries
= NULL
;
3552 ipa_call_summaries
->release ();
3553 delete ipa_call_summaries
;
3554 ipa_call_summaries
= NULL
;
3555 edge_predicate_pool
.release ();
3560 const pass_data pass_data_local_fn_summary
=
3562 GIMPLE_PASS
, /* type */
3563 "local-fnsummary", /* name */
3564 OPTGROUP_INLINE
, /* optinfo_flags */
3565 TV_INLINE_PARAMETERS
, /* tv_id */
3566 0, /* properties_required */
3567 0, /* properties_provided */
3568 0, /* properties_destroyed */
3569 0, /* todo_flags_start */
3570 0, /* todo_flags_finish */
3573 class pass_local_fn_summary
: public gimple_opt_pass
3576 pass_local_fn_summary (gcc::context
*ctxt
)
3577 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3580 /* opt_pass methods: */
3581 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3582 virtual unsigned int execute (function
*)
3584 return compute_fn_summary_for_current ();
3587 }; // class pass_local_fn_summary
3592 make_pass_local_fn_summary (gcc::context
*ctxt
)
3594 return new pass_local_fn_summary (ctxt
);
3598 /* Free inline summary. */
3602 const pass_data pass_data_ipa_free_fn_summary
=
3604 SIMPLE_IPA_PASS
, /* type */
3605 "free-fnsummary", /* name */
3606 OPTGROUP_NONE
, /* optinfo_flags */
3607 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3608 0, /* properties_required */
3609 0, /* properties_provided */
3610 0, /* properties_destroyed */
3611 0, /* todo_flags_start */
3612 0, /* todo_flags_finish */
3615 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3618 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3619 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
3623 /* opt_pass methods: */
3624 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
3625 void set_pass_param (unsigned int n
, bool param
)
3627 gcc_assert (n
== 0);
3630 virtual bool gate (function
*) { return small_p
|| !flag_wpa
; }
3631 virtual unsigned int execute (function
*)
3633 ipa_free_fn_summary ();
3639 }; // class pass_ipa_free_fn_summary
3643 simple_ipa_opt_pass
*
3644 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3646 return new pass_ipa_free_fn_summary (ctxt
);
3651 const pass_data pass_data_ipa_fn_summary
=
3653 IPA_PASS
, /* type */
3654 "fnsummary", /* name */
3655 OPTGROUP_INLINE
, /* optinfo_flags */
3656 TV_IPA_FNSUMMARY
, /* tv_id */
3657 0, /* properties_required */
3658 0, /* properties_provided */
3659 0, /* properties_destroyed */
3660 0, /* todo_flags_start */
3661 ( TODO_dump_symtab
), /* todo_flags_finish */
3664 class pass_ipa_fn_summary
: public ipa_opt_pass_d
3667 pass_ipa_fn_summary (gcc::context
*ctxt
)
3668 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
3669 ipa_fn_summary_generate
, /* generate_summary */
3670 ipa_fn_summary_write
, /* write_summary */
3671 ipa_fn_summary_read
, /* read_summary */
3672 NULL
, /* write_optimization_summary */
3673 NULL
, /* read_optimization_summary */
3674 NULL
, /* stmt_fixup */
3675 0, /* function_transform_todo_flags_start */
3676 NULL
, /* function_transform */
3677 NULL
) /* variable_transform */
3680 /* opt_pass methods: */
3681 virtual unsigned int execute (function
*) { return 0; }
3683 }; // class pass_ipa_fn_summary
3688 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
3690 return new pass_ipa_fn_summary (ctxt
);
3693 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3694 within the same process. For use by toplev::finalize. */
3697 ipa_fnsummary_c_finalize (void)
3699 ipa_free_fn_summary ();