1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
88 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
89 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
91 /* Edge predicates goes here. */
92 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
97 ipa_dump_hints (FILE *f
, ipa_hints hints
)
101 fprintf (f
, "IPA hints:");
102 if (hints
& INLINE_HINT_indirect_call
)
104 hints
&= ~INLINE_HINT_indirect_call
;
105 fprintf (f
, " indirect_call");
107 if (hints
& INLINE_HINT_loop_iterations
)
109 hints
&= ~INLINE_HINT_loop_iterations
;
110 fprintf (f
, " loop_iterations");
112 if (hints
& INLINE_HINT_loop_stride
)
114 hints
&= ~INLINE_HINT_loop_stride
;
115 fprintf (f
, " loop_stride");
117 if (hints
& INLINE_HINT_same_scc
)
119 hints
&= ~INLINE_HINT_same_scc
;
120 fprintf (f
, " same_scc");
122 if (hints
& INLINE_HINT_in_scc
)
124 hints
&= ~INLINE_HINT_in_scc
;
125 fprintf (f
, " in_scc");
127 if (hints
& INLINE_HINT_cross_module
)
129 hints
&= ~INLINE_HINT_cross_module
;
130 fprintf (f
, " cross_module");
132 if (hints
& INLINE_HINT_declared_inline
)
134 hints
&= ~INLINE_HINT_declared_inline
;
135 fprintf (f
, " declared_inline");
137 if (hints
& INLINE_HINT_known_hot
)
139 hints
&= ~INLINE_HINT_known_hot
;
140 fprintf (f
, " known_hot");
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
152 ipa_fn_summary::account_size_time (int size
, sreal time
,
153 const predicate
&exec_pred
,
154 const predicate
&nonconst_pred_in
)
159 predicate nonconst_pred
;
161 if (exec_pred
== false)
164 nonconst_pred
= nonconst_pred_in
& exec_pred
;
166 if (nonconst_pred
== false)
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size
&& time
== 0 && size_time_table
)
174 gcc_assert (time
>= 0);
176 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
177 if (e
->exec_predicate
== exec_pred
178 && e
->nonconst_predicate
== nonconst_pred
)
187 e
= &(*size_time_table
)[0];
188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
197 ((double) size
) / ipa_fn_summary::size_scale
,
198 (time
.to_double ()), found
? "" : "new ");
199 exec_pred
.dump (dump_file
, conds
, 0);
200 if (exec_pred
!= nonconst_pred
)
202 fprintf (dump_file
, " nonconst:");
203 nonconst_pred
.dump (dump_file
, conds
);
206 fprintf (dump_file
, "\n");
210 class size_time_entry new_entry
;
211 new_entry
.size
= size
;
212 new_entry
.time
= time
;
213 new_entry
.exec_predicate
= exec_pred
;
214 new_entry
.nonconst_predicate
= nonconst_pred
;
215 vec_safe_push (size_time_table
, new_entry
);
224 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
226 static struct cgraph_edge
*
227 redirect_to_unreachable (struct cgraph_edge
*e
)
229 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
230 struct cgraph_node
*target
= cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
234 e
= e
->resolve_speculation (target
->decl
);
236 e
->make_direct (target
);
238 e
->redirect_callee (target
);
239 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
240 e
->inline_failed
= CIF_UNREACHABLE
;
241 e
->count
= profile_count::zero ();
242 es
->call_stmt_size
= 0;
243 es
->call_stmt_time
= 0;
245 callee
->remove_symbol_and_inline_clones ();
249 /* Set predicate for edge E. */
252 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
254 /* If the edge is determined to be never executed, redirect it
255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
257 if (predicate
&& *predicate
== false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e
->speculative
|| e
->callee
))
262 e
= redirect_to_unreachable (e
);
264 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
265 if (predicate
&& *predicate
!= true)
268 es
->predicate
= edge_predicate_pool
.allocate ();
269 *es
->predicate
= *predicate
;
274 edge_predicate_pool
.remove (es
->predicate
);
275 es
->predicate
= NULL
;
279 /* Set predicate for hint *P. */
282 set_hint_predicate (predicate
**p
, predicate new_predicate
)
284 if (new_predicate
== false || new_predicate
== true)
287 edge_predicate_pool
.remove (*p
);
293 *p
= edge_predicate_pool
.allocate ();
299 /* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
305 is always false in the second and also builtin_constant_p tests cannot use
306 the fact that parameter is indeed a constant.
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
310 Return clause of possible truths. When INLINE_P is true, assume that we are
313 ERROR_MARK means compile time invariant. */
316 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
318 vec
<tree
> known_vals
,
319 vec
<ipa_agg_jump_function_p
>
321 clause_t
*ret_clause
,
322 clause_t
*ret_nonspec_clause
)
324 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
325 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
326 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
330 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
335 /* We allow call stmt to have fewer arguments than the callee function
336 (especially for K&R style programs). So bound check here (we assume
337 known_aggs vector, if non-NULL, has the same length as
339 gcc_checking_assert (!known_aggs
.exists ()
340 || (known_vals
.length () == known_aggs
.length ()));
341 if (c
->operand_num
>= (int) known_vals
.length ())
343 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
344 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
350 struct ipa_agg_jump_function
*agg
;
352 if (c
->code
== predicate::changed
354 && (known_vals
[c
->operand_num
] == error_mark_node
))
357 if (known_aggs
.exists ())
359 agg
= known_aggs
[c
->operand_num
];
360 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
361 c
->offset
, c
->by_ref
);
368 val
= known_vals
[c
->operand_num
];
369 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
375 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
376 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
379 if (c
->code
== predicate::changed
)
381 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
385 if (maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (val
))), c
->size
))
387 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
388 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
391 if (c
->code
== predicate::is_not_constant
)
393 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
397 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (c
->val
), val
);
399 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
402 if (res
&& integer_zerop (res
))
405 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
406 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
408 *ret_clause
= clause
;
409 if (ret_nonspec_clause
)
410 *ret_nonspec_clause
= nonspec_clause
;
414 /* Work out what conditions might be true at invocation of E. */
417 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
418 clause_t
*clause_ptr
,
419 clause_t
*nonspec_clause_ptr
,
420 vec
<tree
> *known_vals_ptr
,
421 vec
<ipa_polymorphic_call_context
>
423 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
425 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
426 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
427 vec
<tree
> known_vals
= vNULL
;
428 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
431 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
433 known_vals_ptr
->create (0);
434 if (known_contexts_ptr
)
435 known_contexts_ptr
->create (0);
437 if (ipa_node_params_sum
438 && !e
->call_stmt_cannot_inline_p
439 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
))
441 class ipa_node_params
*caller_parms_info
, *callee_pi
;
442 class ipa_edge_args
*args
= IPA_EDGE_REF (e
);
443 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
444 int i
, count
= ipa_get_cs_argument_count (args
);
446 if (e
->caller
->global
.inlined_to
)
447 caller_parms_info
= IPA_NODE_REF (e
->caller
->global
.inlined_to
);
449 caller_parms_info
= IPA_NODE_REF (e
->caller
);
450 callee_pi
= IPA_NODE_REF (e
->callee
);
452 if (count
&& (info
->conds
|| known_vals_ptr
))
453 known_vals
.safe_grow_cleared (count
);
454 if (count
&& (info
->conds
|| known_aggs_ptr
))
455 known_aggs
.safe_grow_cleared (count
);
456 if (count
&& known_contexts_ptr
)
457 known_contexts_ptr
->safe_grow_cleared (count
);
459 for (i
= 0; i
< count
; i
++)
461 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
462 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
463 ipa_get_type (callee_pi
, i
));
465 if (!cst
&& e
->call_stmt
466 && i
< (int)gimple_call_num_args (e
->call_stmt
))
468 cst
= gimple_call_arg (e
->call_stmt
, i
);
469 if (!is_gimple_min_invariant (cst
))
474 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
475 if (known_vals
.exists ())
478 else if (inline_p
&& !es
->param
[i
].change_prob
)
479 known_vals
[i
] = error_mark_node
;
481 if (known_contexts_ptr
)
482 (*known_contexts_ptr
)[i
]
483 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
484 /* TODO: When IPA-CP starts propagating and merging aggregate jump
485 functions, use its knowledge of the caller too, just like the
486 scalar case above. */
487 known_aggs
[i
] = &jf
->agg
;
490 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
491 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
493 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
495 if (count
&& (info
->conds
|| known_vals_ptr
))
496 known_vals
.safe_grow_cleared (count
);
497 for (i
= 0; i
< count
; i
++)
499 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
500 if (!is_gimple_min_invariant (cst
))
507 evaluate_conditions_for_known_args (callee
, inline_p
,
508 known_vals
, known_aggs
, clause_ptr
,
512 *known_vals_ptr
= known_vals
;
514 known_vals
.release ();
517 *known_aggs_ptr
= known_aggs
;
519 known_aggs
.release ();
523 /* Allocate the function summary. */
526 ipa_fn_summary_alloc (void)
528 gcc_checking_assert (!ipa_fn_summaries
);
529 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
530 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
533 ipa_call_summary::~ipa_call_summary ()
536 edge_predicate_pool
.remove (predicate
);
541 ipa_fn_summary::~ipa_fn_summary ()
544 edge_predicate_pool
.remove (loop_iterations
);
546 edge_predicate_pool
.remove (loop_stride
);
548 vec_free (size_time_table
);
552 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
555 for (e
= node
->callees
; e
; e
= e
->next_callee
)
556 ipa_call_summaries
->remove (e
);
557 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
558 ipa_call_summaries
->remove (e
);
561 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
562 Additionally care about allocating new memory slot for updated predicate
563 and set it to NULL when it becomes true or false (and thus uninteresting).
567 remap_hint_predicate_after_duplication (predicate
**p
,
568 clause_t possible_truths
)
570 predicate new_predicate
;
575 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
576 /* We do not want to free previous predicate; it is used by node origin. */
578 set_hint_predicate (p
, new_predicate
);
582 /* Hook that is called by cgraph.c when a node is duplicated. */
584 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
587 ipa_fn_summary
*info
)
589 new (info
) ipa_fn_summary (*ipa_fn_summaries
->get (src
));
590 /* TODO: as an optimization, we may avoid copying conditions
591 that are known to be false or true. */
592 info
->conds
= vec_safe_copy (info
->conds
);
594 /* When there are any replacements in the function body, see if we can figure
595 out that something was optimized out. */
596 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
598 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
599 /* Use SRC parm info since it may not be copied yet. */
600 class ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
601 vec
<tree
> known_vals
= vNULL
;
602 int count
= ipa_get_param_count (parms_info
);
604 clause_t possible_truths
;
605 predicate true_pred
= true;
607 int optimized_out_size
= 0;
608 bool inlined_to_p
= false;
609 struct cgraph_edge
*edge
, *next
;
611 info
->size_time_table
= 0;
612 known_vals
.safe_grow_cleared (count
);
613 for (i
= 0; i
< count
; i
++)
615 struct ipa_replace_map
*r
;
617 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
619 if (((!r
->old_tree
&& r
->parm_num
== i
)
620 || (r
->old_tree
&& r
->old_tree
== ipa_get_param (parms_info
, i
)))
621 && r
->replace_p
&& !r
->ref_p
)
623 known_vals
[i
] = r
->new_tree
;
628 evaluate_conditions_for_known_args (dst
, false,
632 /* We are going to specialize,
633 so ignore nonspec truths. */
635 known_vals
.release ();
637 info
->account_size_time (0, 0, true_pred
, true_pred
);
639 /* Remap size_time vectors.
640 Simplify the predicate by prunning out alternatives that are known
642 TODO: as on optimization, we can also eliminate conditions known
644 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
646 predicate new_exec_pred
;
647 predicate new_nonconst_pred
;
648 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
650 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
652 if (new_exec_pred
== false || new_nonconst_pred
== false)
653 optimized_out_size
+= e
->size
;
655 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
659 /* Remap edge predicates with the same simplification as above.
660 Also copy constantness arrays. */
661 for (edge
= dst
->callees
; edge
; edge
= next
)
663 predicate new_predicate
;
664 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
665 next
= edge
->next_callee
;
667 if (!edge
->inline_failed
)
671 new_predicate
= es
->predicate
->remap_after_duplication
673 if (new_predicate
== false && *es
->predicate
!= false)
674 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
675 edge_set_predicate (edge
, &new_predicate
);
678 /* Remap indirect edge predicates with the same simplificaiton as above.
679 Also copy constantness arrays. */
680 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
682 predicate new_predicate
;
683 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
684 next
= edge
->next_callee
;
686 gcc_checking_assert (edge
->inline_failed
);
689 new_predicate
= es
->predicate
->remap_after_duplication
691 if (new_predicate
== false && *es
->predicate
!= false)
692 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
693 edge_set_predicate (edge
, &new_predicate
);
695 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
697 remap_hint_predicate_after_duplication (&info
->loop_stride
,
700 /* If inliner or someone after inliner will ever start producing
701 non-trivial clones, we will get trouble with lack of information
702 about updating self sizes, because size vectors already contains
703 sizes of the calees. */
704 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
708 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
709 if (info
->loop_iterations
)
711 predicate p
= *info
->loop_iterations
;
712 info
->loop_iterations
= NULL
;
713 set_hint_predicate (&info
->loop_iterations
, p
);
715 if (info
->loop_stride
)
717 predicate p
= *info
->loop_stride
;
718 info
->loop_stride
= NULL
;
719 set_hint_predicate (&info
->loop_stride
, p
);
722 if (!dst
->global
.inlined_to
)
723 ipa_update_overall_fn_summary (dst
);
727 /* Hook that is called by cgraph.c when a node is duplicated. */
730 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
731 struct cgraph_edge
*dst
,
732 class ipa_call_summary
*srcinfo
,
733 class ipa_call_summary
*info
)
735 new (info
) ipa_call_summary (*srcinfo
);
736 info
->predicate
= NULL
;
737 edge_set_predicate (dst
, srcinfo
->predicate
);
738 info
->param
= srcinfo
->param
.copy ();
739 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
741 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
742 - eni_size_weights
.call_cost
);
743 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
744 - eni_time_weights
.call_cost
);
748 /* Dump edge summaries associated to NODE and recursively to all clones.
752 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
753 class ipa_fn_summary
*info
)
755 struct cgraph_edge
*edge
;
756 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
758 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
759 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
763 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
764 indent
, "", callee
->name (), callee
->order
,
766 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
767 indent
, "", es
->loop_depth
, edge
->sreal_frequency ().to_double (),
768 es
->call_stmt_size
, es
->call_stmt_time
);
770 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
772 fprintf (f
, "callee size:%2i stack:%2i",
773 (int) (s
->size
/ ipa_fn_summary::size_scale
),
774 (int) s
->estimated_stack_size
);
778 fprintf (f
, " predicate: ");
779 es
->predicate
->dump (f
, info
->conds
);
783 if (es
->param
.exists ())
784 for (i
= 0; i
< (int) es
->param
.length (); i
++)
786 int prob
= es
->param
[i
].change_prob
;
789 fprintf (f
, "%*s op%i is compile time invariant\n",
791 else if (prob
!= REG_BR_PROB_BASE
)
792 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
793 prob
* 100.0 / REG_BR_PROB_BASE
);
795 if (!edge
->inline_failed
)
797 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
798 fprintf (f
, "%*sStack frame offset %i, callee self size %i,"
801 (int) s
->stack_frame_offset
,
802 (int) s
->estimated_self_stack_size
,
803 (int) s
->estimated_stack_size
);
804 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
807 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
809 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
810 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
814 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
818 fprintf (f
, "predicate: ");
819 es
->predicate
->dump (f
, info
->conds
);
828 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
830 if (node
->definition
)
832 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
837 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
838 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
839 fprintf (f
, " always_inline");
841 fprintf (f
, " inlinable");
842 if (s
->fp_expressions
)
843 fprintf (f
, " fp_expression");
844 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
845 fprintf (f
, " self size: %i\n", s
->self_size
);
846 fprintf (f
, " global size: %i\n", s
->size
);
847 fprintf (f
, " min size: %i\n", s
->min_size
);
848 fprintf (f
, " self stack: %i\n",
849 (int) s
->estimated_self_stack_size
);
850 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
852 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
854 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
855 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
857 fprintf (f
, " size:%f, time:%f",
858 (double) e
->size
/ ipa_fn_summary::size_scale
,
859 e
->time
.to_double ());
860 if (e
->exec_predicate
!= true)
862 fprintf (f
, ", executed if:");
863 e
->exec_predicate
.dump (f
, s
->conds
, 0);
865 if (e
->exec_predicate
!= e
->nonconst_predicate
)
867 fprintf (f
, ", nonconst if:");
868 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
872 if (s
->loop_iterations
)
874 fprintf (f
, " loop iterations:");
875 s
->loop_iterations
->dump (f
, s
->conds
);
879 fprintf (f
, " loop stride:");
880 s
->loop_stride
->dump (f
, s
->conds
);
882 fprintf (f
, " calls:\n");
883 dump_ipa_call_summary (f
, 4, node
, s
);
887 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
892 ipa_debug_fn_summary (struct cgraph_node
*node
)
894 ipa_dump_fn_summary (stderr
, node
);
898 ipa_dump_fn_summaries (FILE *f
)
900 struct cgraph_node
*node
;
902 FOR_EACH_DEFINED_FUNCTION (node
)
903 if (!node
->global
.inlined_to
)
904 ipa_dump_fn_summary (f
, node
);
907 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
908 boolean variable pointed to by DATA. */
911 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
914 bool *b
= (bool *) data
;
919 /* If OP refers to value of function parameter, return the corresponding
920 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
921 PARM_DECL) will be stored to *SIZE_P in that case too. */
924 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
927 /* SSA_NAME referring to parm default def? */
928 if (TREE_CODE (op
) == SSA_NAME
929 && SSA_NAME_IS_DEFAULT_DEF (op
)
930 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
933 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
934 return SSA_NAME_VAR (op
);
936 /* Non-SSA parm reference? */
937 if (TREE_CODE (op
) == PARM_DECL
)
939 bool modified
= false;
942 ao_ref_init (&refd
, op
);
943 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
944 mark_modified
, &modified
, NULL
, NULL
,
945 fbi
->aa_walk_budget
+ 1);
948 fbi
->aa_walk_budget
= 0;
954 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
961 /* If OP refers to value of function parameter, return the corresponding
962 parameter. Also traverse chains of SSA register assignments. If non-NULL,
963 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
964 stored to *SIZE_P in that case too. */
967 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
970 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
974 if (TREE_CODE (op
) == SSA_NAME
975 && !SSA_NAME_IS_DEFAULT_DEF (op
)
976 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
977 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
978 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
983 /* If OP refers to a value of a function parameter or value loaded from an
984 aggregate passed to a parameter (either by value or reference), return TRUE
985 and store the number of the parameter to *INDEX_P, the access size into
986 *SIZE_P, and information whether and how it has been loaded from an
987 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
988 statement in which OP is used or loaded. */
991 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
992 gimple
*stmt
, tree op
, int *index_p
,
994 struct agg_position_info
*aggpos
)
996 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
998 gcc_checking_assert (aggpos
);
1001 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1004 aggpos
->agg_contents
= false;
1005 aggpos
->by_ref
= false;
1009 if (TREE_CODE (op
) == SSA_NAME
)
1011 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1012 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1014 stmt
= SSA_NAME_DEF_STMT (op
);
1015 op
= gimple_assign_rhs1 (stmt
);
1016 if (!REFERENCE_CLASS_P (op
))
1017 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1021 aggpos
->agg_contents
= true;
1022 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1023 stmt
, op
, index_p
, &aggpos
->offset
,
1024 size_p
, &aggpos
->by_ref
);
1027 /* See if statement might disappear after inlining.
1028 0 - means not eliminated
1029 1 - half of statements goes away
1030 2 - for sure it is eliminated.
1031 We are not terribly sophisticated, basically looking for simple abstraction
1032 penalty wrappers. */
1035 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1037 enum gimple_code code
= gimple_code (stmt
);
1038 enum tree_code rhs_code
;
1048 if (gimple_num_ops (stmt
) != 2)
1051 rhs_code
= gimple_assign_rhs_code (stmt
);
1053 /* Casts of parameters, loads from parameters passed by reference
1054 and stores to return value or parameters are often free after
1055 inlining dua to SRA and further combining.
1056 Assume that half of statements goes away. */
1057 if (CONVERT_EXPR_CODE_P (rhs_code
)
1058 || rhs_code
== VIEW_CONVERT_EXPR
1059 || rhs_code
== ADDR_EXPR
1060 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1062 tree rhs
= gimple_assign_rhs1 (stmt
);
1063 tree lhs
= gimple_assign_lhs (stmt
);
1064 tree inner_rhs
= get_base_address (rhs
);
1065 tree inner_lhs
= get_base_address (lhs
);
1066 bool rhs_free
= false;
1067 bool lhs_free
= false;
1074 /* Reads of parameter are expected to be free. */
1075 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1077 /* Match expressions of form &this->field. Those will most likely
1078 combine with something upstream after inlining. */
1079 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1081 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1082 if (TREE_CODE (op
) == PARM_DECL
)
1084 else if (TREE_CODE (op
) == MEM_REF
1085 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1090 /* When parameter is not SSA register because its address is taken
1091 and it is just copied into one, the statement will be completely
1092 free after inlining (we will copy propagate backward). */
1093 if (rhs_free
&& is_gimple_reg (lhs
))
1096 /* Reads of parameters passed by reference
1097 expected to be free (i.e. optimized out after inlining). */
1098 if (TREE_CODE (inner_rhs
) == MEM_REF
1099 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1102 /* Copying parameter passed by reference into gimple register is
1103 probably also going to copy propagate, but we can't be quite
1105 if (rhs_free
&& is_gimple_reg (lhs
))
1108 /* Writes to parameters, parameters passed by value and return value
1109 (either dirrectly or passed via invisible reference) are free.
1111 TODO: We ought to handle testcase like
1112 struct a {int a,b;};
1114 retrurnsturct (void)
1120 This translate into:
1135 For that we either need to copy ipa-split logic detecting writes
1137 if (TREE_CODE (inner_lhs
) == PARM_DECL
1138 || TREE_CODE (inner_lhs
) == RESULT_DECL
1139 || (TREE_CODE (inner_lhs
) == MEM_REF
1140 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1142 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1143 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1144 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1146 0))) == RESULT_DECL
))))
1149 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1151 if (lhs_free
&& rhs_free
)
1161 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1162 predicates to the CFG edges. */
1165 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1166 class ipa_fn_summary
*summary
,
1173 struct agg_position_info aggpos
;
1174 enum tree_code code
, inverted_code
;
1180 last
= last_stmt (bb
);
1181 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1183 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1185 op
= gimple_cond_lhs (last
);
1186 /* TODO: handle conditionals like
1189 if (unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1191 code
= gimple_cond_code (last
);
1192 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1194 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1196 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1197 ? code
: inverted_code
);
1198 /* invert_tree_comparison will return ERROR_MARK on FP
1199 comparsions that are not EQ/NE instead of returning proper
1200 unordered one. Be sure it is not confused with NON_CONSTANT. */
1201 if (this_code
!= ERROR_MARK
)
1204 = add_condition (summary
, index
, size
, &aggpos
, this_code
,
1205 unshare_expr_without_location
1206 (gimple_cond_rhs (last
)));
1207 e
->aux
= edge_predicate_pool
.allocate ();
1208 *(predicate
*) e
->aux
= p
;
1213 if (TREE_CODE (op
) != SSA_NAME
)
1216 if (builtin_constant_p (op))
1220 Here we can predicate nonconstant_code. We can't
1221 really handle constant_code since we have no predicate
1222 for this and also the constant code is not known to be
1223 optimized away when inliner doen't see operand is constant.
1224 Other optimizers might think otherwise. */
1225 if (gimple_cond_code (last
) != NE_EXPR
1226 || !integer_zerop (gimple_cond_rhs (last
)))
1228 set_stmt
= SSA_NAME_DEF_STMT (op
);
1229 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1230 || gimple_call_num_args (set_stmt
) != 1)
1232 op2
= gimple_call_arg (set_stmt
, 0);
1233 if (!unmodified_parm_or_parm_agg_item (fbi
, set_stmt
, op2
, &index
, &size
,
1236 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1238 predicate p
= add_condition (summary
, index
, size
, &aggpos
,
1239 predicate::is_not_constant
, NULL_TREE
);
1240 e
->aux
= edge_predicate_pool
.allocate ();
1241 *(predicate
*) e
->aux
= p
;
1246 /* If BB ends by a switch we can turn into predicates, attach corresponding
1247 predicates to the CFG edges. */
1250 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1251 class ipa_fn_summary
*summary
,
1258 struct agg_position_info aggpos
;
1264 lastg
= last_stmt (bb
);
1265 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1267 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1268 op
= gimple_switch_index (last
);
1269 if (!unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1272 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1274 e
->aux
= edge_predicate_pool
.allocate ();
1275 *(predicate
*) e
->aux
= false;
1277 n
= gimple_switch_num_labels (last
);
1278 for (case_idx
= 0; case_idx
< n
; ++case_idx
)
1280 tree cl
= gimple_switch_label (last
, case_idx
);
1284 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1285 min
= CASE_LOW (cl
);
1286 max
= CASE_HIGH (cl
);
1288 /* For default we might want to construct predicate that none
1289 of cases is met, but it is bit hard to do not having negations
1290 of conditionals handy. */
1294 p
= add_condition (summary
, index
, size
, &aggpos
, EQ_EXPR
,
1295 unshare_expr_without_location (min
));
1299 p1
= add_condition (summary
, index
, size
, &aggpos
, GE_EXPR
,
1300 unshare_expr_without_location (min
));
1301 p2
= add_condition (summary
, index
, size
, &aggpos
, LE_EXPR
,
1302 unshare_expr_without_location (max
));
1305 *(class predicate
*) e
->aux
1306 = p
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1311 /* For each BB in NODE attach to its AUX pointer predicate under
1312 which it is executable. */
1315 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1316 struct cgraph_node
*node
,
1317 class ipa_fn_summary
*summary
)
1319 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1323 FOR_EACH_BB_FN (bb
, my_function
)
1325 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1326 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1329 /* Entry block is always executable. */
1330 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1331 = edge_predicate_pool
.allocate ();
1332 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1334 /* A simple dataflow propagation of predicates forward in the CFG.
1335 TODO: work in reverse postorder. */
1339 FOR_EACH_BB_FN (bb
, my_function
)
1341 predicate p
= false;
1344 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1348 predicate this_bb_predicate
1349 = *(predicate
*) e
->src
->aux
;
1351 this_bb_predicate
&= (*(class predicate
*) e
->aux
);
1352 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1358 gcc_checking_assert (!bb
->aux
);
1364 bb
->aux
= edge_predicate_pool
.allocate ();
1365 *((predicate
*) bb
->aux
) = p
;
1367 else if (p
!= *(predicate
*) bb
->aux
)
1369 /* This OR operation is needed to ensure monotonous data flow
1370 in the case we hit the limit on number of clauses and the
1371 and/or operations above give approximate answers. */
1372 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1373 if (p
!= *(predicate
*) bb
->aux
)
1376 *((predicate
*) bb
->aux
) = p
;
1385 /* Return predicate specifying when the STMT might have result that is not
1386 a compile time constant. */
1389 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1390 class ipa_fn_summary
*summary
,
1392 vec
<predicate
> nonconstant_names
)
1398 while (UNARY_CLASS_P (expr
))
1399 expr
= TREE_OPERAND (expr
, 0);
1401 parm
= unmodified_parm (fbi
, NULL
, expr
, &size
);
1402 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1403 return add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1405 if (is_gimple_min_invariant (expr
))
1407 if (TREE_CODE (expr
) == SSA_NAME
)
1408 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1409 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1412 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1413 TREE_OPERAND (expr
, 0),
1419 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1420 TREE_OPERAND (expr
, 1),
1422 return p1
.or_with (summary
->conds
, p2
);
1424 else if (TREE_CODE (expr
) == COND_EXPR
)
1427 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1428 TREE_OPERAND (expr
, 0),
1434 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1435 TREE_OPERAND (expr
, 1),
1439 p1
= p1
.or_with (summary
->conds
, p2
);
1440 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1441 TREE_OPERAND (expr
, 2),
1443 return p2
.or_with (summary
->conds
, p1
);
1445 else if (TREE_CODE (expr
) == CALL_EXPR
)
1456 /* Return predicate specifying when the STMT might have result that is not
1457 a compile time constant. */
1460 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1461 class ipa_fn_summary
*summary
,
1463 vec
<predicate
> nonconstant_names
)
1468 predicate op_non_const
;
1472 struct agg_position_info aggpos
;
1474 /* What statments might be optimized away
1475 when their arguments are constant. */
1476 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1477 && gimple_code (stmt
) != GIMPLE_COND
1478 && gimple_code (stmt
) != GIMPLE_SWITCH
1479 && (gimple_code (stmt
) != GIMPLE_CALL
1480 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1483 /* Stores will stay anyway. */
1484 if (gimple_store_p (stmt
))
1487 is_load
= gimple_assign_load_p (stmt
);
1489 /* Loads can be optimized when the value is known. */
1493 gcc_assert (gimple_assign_single_p (stmt
));
1494 op
= gimple_assign_rhs1 (stmt
);
1495 if (!unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, &base_index
, &size
,
1502 /* See if we understand all operands before we start
1503 adding conditionals. */
1504 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1506 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1507 /* For arguments we can build a condition. */
1508 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1510 if (TREE_CODE (use
) != SSA_NAME
)
1512 /* If we know when operand is constant,
1513 we still can say something useful. */
1514 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1521 add_condition (summary
, base_index
, size
, &aggpos
, predicate::changed
,
1524 op_non_const
= false;
1525 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1528 tree parm
= unmodified_parm (fbi
, stmt
, use
, &size
);
1531 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1533 if (index
!= base_index
)
1534 p
= add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1540 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1541 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1543 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1544 && gimple_op (stmt
, 0)
1545 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1546 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1548 return op_non_const
;
1551 struct record_modified_bb_info
1558 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1559 probability how often it changes between USE_BB.
1560 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1561 is in different loop nest, we can do better.
1562 This is all just estimate. In theory we look for minimal cut separating
1563 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1567 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1569 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1570 if (l
&& l
->header
->count
< init_bb
->count
)
1575 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1576 set except for info->stmt. */
1579 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1581 struct record_modified_bb_info
*info
=
1582 (struct record_modified_bb_info
*) data
;
1583 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1585 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1587 bitmap_set_bit (info
->bb_set
,
1588 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1589 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1591 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1592 gimple_bb (info
->stmt
))->index
);
1595 fprintf (dump_file
, " Param ");
1596 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1597 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1598 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1600 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1601 gimple_bb (info
->stmt
))->index
);
1602 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
1607 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1608 will change since last invocation of STMT.
1610 Value 0 is reserved for compile time invariants.
1611 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1612 ought to be REG_BR_PROB_BASE / estimated_iters. */
1615 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
1617 tree op
= gimple_call_arg (stmt
, i
);
1618 basic_block bb
= gimple_bb (stmt
);
1620 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1621 op
= TREE_OPERAND (op
, 0);
1623 tree base
= get_base_address (op
);
1625 /* Global invariants never change. */
1626 if (is_gimple_min_invariant (base
))
1629 /* We would have to do non-trivial analysis to really work out what
1630 is the probability of value to change (i.e. when init statement
1631 is in a sibling loop of the call).
1633 We do an conservative estimate: when call is executed N times more often
1634 than the statement defining value, we take the frequency 1/N. */
1635 if (TREE_CODE (base
) == SSA_NAME
)
1637 profile_count init_count
;
1639 if (!bb
->count
.nonzero_p ())
1640 return REG_BR_PROB_BASE
;
1642 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1643 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1645 init_count
= get_minimal_bb
1646 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1647 gimple_bb (stmt
))->count
;
1649 if (init_count
< bb
->count
)
1650 return MAX ((init_count
.to_sreal_scale (bb
->count
)
1651 * REG_BR_PROB_BASE
).to_int (), 1);
1652 return REG_BR_PROB_BASE
;
1657 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1658 struct record_modified_bb_info info
;
1659 tree init
= ctor_for_folding (base
);
1661 if (init
!= error_mark_node
)
1663 if (!bb
->count
.nonzero_p ())
1664 return REG_BR_PROB_BASE
;
1667 fprintf (dump_file
, " Analyzing param change probablity of ");
1668 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1669 fprintf (dump_file
, "\n");
1671 ao_ref_init (&refd
, op
);
1674 info
.bb_set
= BITMAP_ALLOC (NULL
);
1676 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1677 NULL
, NULL
, fbi
->aa_walk_budget
);
1678 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
1683 fprintf (dump_file
, " Ran out of AA walking budget.\n");
1685 fprintf (dump_file
, " Set in same BB as used.\n");
1687 BITMAP_FREE (info
.bb_set
);
1688 return REG_BR_PROB_BASE
;
1693 /* Lookup the most frequent update of the value and believe that
1694 it dominates all the other; precise analysis here is difficult. */
1695 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
1696 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
1699 fprintf (dump_file
, " Set with count ");
1700 max
.dump (dump_file
);
1701 fprintf (dump_file
, " and used with count ");
1702 bb
->count
.dump (dump_file
);
1703 fprintf (dump_file
, " freq %f\n",
1704 max
.to_sreal_scale (bb
->count
).to_double ());
1707 BITMAP_FREE (info
.bb_set
);
1708 if (max
< bb
->count
)
1709 return MAX ((max
.to_sreal_scale (bb
->count
)
1710 * REG_BR_PROB_BASE
).to_int (), 1);
1711 return REG_BR_PROB_BASE
;
1715 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1716 sub-graph and if the predicate the condition depends on is known. If so,
1717 return true and store the pointer the predicate in *P. */
1720 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
1721 ipa_fn_summary
*summary
, basic_block bb
,
1723 vec
<predicate
> nonconstant_names
)
1727 basic_block first_bb
= NULL
;
1730 if (single_pred_p (bb
))
1736 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1738 if (single_succ_p (e
->src
))
1740 if (!single_pred_p (e
->src
))
1743 first_bb
= single_pred (e
->src
);
1744 else if (single_pred (e
->src
) != first_bb
)
1751 else if (e
->src
!= first_bb
)
1759 stmt
= last_stmt (first_bb
);
1761 || gimple_code (stmt
) != GIMPLE_COND
1762 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
1765 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1766 gimple_cond_lhs (stmt
),
1774 /* Given a PHI statement in a function described by inline properties SUMMARY
1775 and *P being the predicate describing whether the selected PHI argument is
1776 known, store a predicate for the result of the PHI statement into
1777 NONCONSTANT_NAMES, if possible. */
1780 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
1782 vec
<predicate
> nonconstant_names
)
1786 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
1788 tree arg
= gimple_phi_arg (phi
, i
)->def
;
1789 if (!is_gimple_min_invariant (arg
))
1791 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
1792 *p
= p
->or_with (summary
->conds
,
1793 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
1799 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1801 fprintf (dump_file
, "\t\tphi predicate: ");
1802 p
->dump (dump_file
, summary
->conds
);
1804 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
1807 /* For a typical usage of __builtin_expect (a<b, 1), we
1808 may introduce an extra relation stmt:
1809 With the builtin, we have
1812 t3 = __builtin_expect (t2, 1);
1815 Without the builtin, we have
1818 This affects the size/time estimation and may have
1819 an impact on the earlier inlining.
1820 Here find this pattern and fix it up later. */
1823 find_foldable_builtin_expect (basic_block bb
)
1825 gimple_stmt_iterator bsi
;
1827 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1829 gimple
*stmt
= gsi_stmt (bsi
);
1830 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
1831 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
1832 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
1834 tree var
= gimple_call_lhs (stmt
);
1835 tree arg
= gimple_call_arg (stmt
, 0);
1836 use_operand_p use_p
;
1843 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
1845 while (TREE_CODE (arg
) == SSA_NAME
)
1847 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
1848 if (!is_gimple_assign (stmt_tmp
))
1850 switch (gimple_assign_rhs_code (stmt_tmp
))
1869 arg
= gimple_assign_rhs1 (stmt_tmp
);
1872 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
1873 && gimple_code (use_stmt
) == GIMPLE_COND
)
1880 /* Return true when the basic blocks contains only clobbers followed by RESX.
1881 Such BBs are kept around to make removal of dead stores possible with
1882 presence of EH and will be optimized out by optimize_clobbers later in the
1885 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1886 that can be clobber only, too.. When it is false, the RESX is not necessary
1887 on the end of basic block. */
1890 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
1892 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1898 if (gsi_end_p (gsi
))
1900 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
1904 else if (!single_succ_p (bb
))
1907 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
1909 gimple
*stmt
= gsi_stmt (gsi
);
1910 if (is_gimple_debug (stmt
))
1912 if (gimple_clobber_p (stmt
))
1914 if (gimple_code (stmt
) == GIMPLE_LABEL
)
1919 /* See if all predecestors are either throws or clobber only BBs. */
1920 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1921 if (!(e
->flags
& EDGE_EH
)
1922 && !clobber_only_eh_bb_p (e
->src
, false))
1928 /* Return true if STMT compute a floating point expression that may be affected
1929 by -ffast-math and similar flags. */
1932 fp_expression_p (gimple
*stmt
)
1937 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
1938 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
1943 /* Analyze function body for NODE.
1944 EARLY indicates run from early optimization pipeline. */
1947 analyze_function_body (struct cgraph_node
*node
, bool early
)
1949 sreal time
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
);
1950 /* Estimate static overhead for function prologue/epilogue and alignment. */
1951 int size
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
);
1952 /* Benefits are scaled by probability of elimination that is in range
1955 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1957 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
1958 predicate bb_predicate
;
1959 struct ipa_func_body_info fbi
;
1960 vec
<predicate
> nonconstant_names
= vNULL
;
1963 gimple
*fix_builtin_expect_stmt
;
1965 gcc_assert (my_function
&& my_function
->cfg
);
1966 gcc_assert (cfun
== my_function
);
1968 memset(&fbi
, 0, sizeof(fbi
));
1969 vec_free (info
->conds
);
1971 vec_free (info
->size_time_table
);
1972 info
->size_time_table
= NULL
;
1974 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
1975 so we can produce proper inline hints.
1977 When optimizing and analyzing for early inliner, initialize node params
1978 so we can produce correct BB predicates. */
1980 if (opt_for_fn (node
->decl
, optimize
))
1982 calculate_dominance_info (CDI_DOMINATORS
);
1984 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
1987 ipa_check_create_node_params ();
1988 ipa_initialize_node_params (node
);
1991 if (ipa_node_params_sum
)
1994 fbi
.info
= IPA_NODE_REF (node
);
1995 fbi
.bb_infos
= vNULL
;
1996 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
1997 fbi
.param_count
= count_formal_params (node
->decl
);
1998 fbi
.aa_walk_budget
= PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS
);
2000 nonconstant_names
.safe_grow_cleared
2001 (SSANAMES (my_function
)->length ());
2006 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2009 /* When we run into maximal number of entries, we assign everything to the
2010 constant truth case. Be sure to have it in list. */
2011 bb_predicate
= true;
2012 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2014 bb_predicate
= predicate::not_inlined ();
2015 info
->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
)
2016 * ipa_fn_summary::size_scale
,
2017 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
),
2022 compute_bb_predicates (&fbi
, node
, info
);
2023 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2024 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2025 for (n
= 0; n
< nblocks
; n
++)
2027 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2028 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2029 if (clobber_only_eh_bb_p (bb
))
2031 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2032 fprintf (dump_file
, "\n Ignoring BB %i;"
2033 " it will be optimized away by cleanup_clobbers\n",
2038 /* TODO: Obviously predicates can be propagated down across CFG. */
2042 bb_predicate
= *(predicate
*) bb
->aux
;
2044 bb_predicate
= false;
2047 bb_predicate
= true;
2049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2051 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2052 bb_predicate
.dump (dump_file
, info
->conds
);
2055 if (fbi
.info
&& nonconstant_names
.exists ())
2057 predicate phi_predicate
;
2058 bool first_phi
= true;
2060 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2064 && !phi_result_unknown_predicate (&fbi
, info
, bb
,
2069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2071 fprintf (dump_file
, " ");
2072 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2074 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2079 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2081 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2082 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2084 gimple
*stmt
= gsi_stmt (bsi
);
2085 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2086 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2088 predicate will_be_nonconstant
;
2090 /* This relation stmt should be folded after we remove
2091 buildin_expect call. Adjust the cost here. */
2092 if (stmt
== fix_builtin_expect_stmt
)
2098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2100 fprintf (dump_file
, " ");
2101 print_gimple_stmt (dump_file
, stmt
, 0);
2102 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2103 freq
.to_double (), this_size
,
2107 if (is_gimple_call (stmt
)
2108 && !gimple_call_internal_p (stmt
))
2110 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2111 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2113 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2114 resolved as constant. We however don't want to optimize
2115 out the cgraph edges. */
2116 if (nonconstant_names
.exists ()
2117 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2118 && gimple_call_lhs (stmt
)
2119 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2121 predicate false_p
= false;
2122 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2125 if (ipa_node_params_sum
)
2127 int count
= gimple_call_num_args (stmt
);
2131 es
->param
.safe_grow_cleared (count
);
2132 for (i
= 0; i
< count
; i
++)
2134 int prob
= param_change_prob (&fbi
, stmt
, i
);
2135 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2136 es
->param
[i
].change_prob
= prob
;
2140 es
->call_stmt_size
= this_size
;
2141 es
->call_stmt_time
= this_time
;
2142 es
->loop_depth
= bb_loop_depth (bb
);
2143 edge_set_predicate (edge
, &bb_predicate
);
2144 if (edge
->speculative
)
2146 cgraph_edge
*direct
, *indirect
;
2148 edge
->speculative_call_info (direct
, indirect
, ref
);
2149 gcc_assert (direct
== edge
);
2150 ipa_call_summary
*es2
2151 = ipa_call_summaries
->get_create (indirect
);
2152 ipa_call_summaries
->duplicate (edge
, indirect
,
2157 /* TODO: When conditional jump or swithc is known to be constant, but
2158 we did not translate it into the predicates, we really can account
2159 just maximum of the possible paths. */
2162 = will_be_nonconstant_predicate (&fbi
, info
,
2163 stmt
, nonconstant_names
);
2165 will_be_nonconstant
= true;
2166 if (this_time
|| this_size
)
2168 sreal final_time
= (sreal
)this_time
* freq
;
2170 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2171 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2173 "\t\t50%% will be eliminated by inlining\n");
2174 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2175 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2177 class predicate p
= bb_predicate
& will_be_nonconstant
;
2179 /* We can ignore statement when we proved it is never going
2180 to happen, but we cannot do that for call statements
2181 because edges are accounted specially. */
2183 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2189 /* We account everything but the calls. Calls have their own
2190 size/time info attached to cgraph edges. This is necessary
2191 in order to make the cost disappear after inlining. */
2192 if (!is_gimple_call (stmt
))
2196 predicate ip
= bb_predicate
& predicate::not_inlined ();
2197 info
->account_size_time (this_size
* prob
,
2198 (final_time
* prob
) / 2, ip
,
2202 info
->account_size_time (this_size
* (2 - prob
),
2203 (final_time
* (2 - prob
) / 2),
2208 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2210 info
->fp_expressions
= true;
2212 fprintf (dump_file
, " fp_expression set\n");
2216 /* Account cost of address calculations in the statements. */
2217 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2219 for (tree op
= gimple_op (stmt
, i
);
2220 op
&& handled_component_p (op
);
2221 op
= TREE_OPERAND (op
, 0))
2222 if ((TREE_CODE (op
) == ARRAY_REF
2223 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2224 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2226 predicate p
= bb_predicate
;
2228 p
= p
& will_be_nonconstant_expr_predicate
2229 (&fbi
, info
, TREE_OPERAND (op
, 1),
2237 "\t\tAccounting address calculation.\n");
2238 info
->account_size_time (ipa_fn_summary::size_scale
,
2250 if (nonconstant_names
.exists () && !early
)
2253 predicate loop_iterations
= true;
2254 predicate loop_stride
= true;
2256 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2257 flow_loops_dump (dump_file
, NULL
, 0);
2259 FOR_EACH_LOOP (loop
, 0)
2264 class tree_niter_desc niter_desc
;
2265 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2267 exits
= get_loop_exit_edges (loop
);
2268 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2269 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2270 && !is_gimple_min_invariant (niter_desc
.niter
))
2272 predicate will_be_nonconstant
2273 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2276 if (will_be_nonconstant
!= true)
2277 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2278 if (will_be_nonconstant
!= true
2279 && will_be_nonconstant
!= false)
2280 /* This is slightly inprecise. We may want to represent each
2281 loop with independent predicate. */
2282 loop_iterations
&= will_be_nonconstant
;
2287 /* To avoid quadratic behavior we analyze stride predicates only
2288 with respect to the containing loop. Thus we simply iterate
2289 over all defs in the outermost loop body. */
2290 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2291 loop
!= NULL
; loop
= loop
->next
)
2293 basic_block
*body
= get_loop_body (loop
);
2294 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2296 gimple_stmt_iterator gsi
;
2297 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2298 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2301 gimple
*stmt
= gsi_stmt (gsi
);
2303 if (!is_gimple_assign (stmt
))
2306 tree def
= gimple_assign_lhs (stmt
);
2307 if (TREE_CODE (def
) != SSA_NAME
)
2311 if (!simple_iv (loop_containing_stmt (stmt
),
2312 loop_containing_stmt (stmt
),
2314 || is_gimple_min_invariant (iv
.step
))
2317 predicate will_be_nonconstant
2318 = will_be_nonconstant_expr_predicate (&fbi
, info
, iv
.step
,
2320 if (will_be_nonconstant
!= true)
2321 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2322 if (will_be_nonconstant
!= true
2323 && will_be_nonconstant
!= false)
2324 /* This is slightly inprecise. We may want to represent
2325 each loop with independent predicate. */
2326 loop_stride
= loop_stride
& will_be_nonconstant
;
2331 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2332 set_hint_predicate (&s
->loop_iterations
, loop_iterations
);
2333 set_hint_predicate (&s
->loop_stride
, loop_stride
);
2336 FOR_ALL_BB_FN (bb
, my_function
)
2342 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2344 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2347 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2351 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2353 s
->self_size
= size
;
2354 nonconstant_names
.release ();
2355 ipa_release_body_info (&fbi
);
2356 if (opt_for_fn (node
->decl
, optimize
))
2359 loop_optimizer_finalize ();
2360 else if (!ipa_edge_args_sum
)
2361 ipa_free_all_node_params ();
2362 free_dominance_info (CDI_DOMINATORS
);
2366 fprintf (dump_file
, "\n");
2367 ipa_dump_fn_summary (dump_file
, node
);
2372 /* Compute function summary.
2373 EARLY is true when we compute parameters during early opts. */
2376 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2378 HOST_WIDE_INT self_stack_size
;
2379 struct cgraph_edge
*e
;
2380 class ipa_fn_summary
*info
;
2382 gcc_assert (!node
->global
.inlined_to
);
2384 if (!ipa_fn_summaries
)
2385 ipa_fn_summary_alloc ();
2387 /* Create a new ipa_fn_summary. */
2388 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
2389 ipa_fn_summaries
->remove (node
);
2390 info
= ipa_fn_summaries
->get_create (node
);
2392 /* Estimate the stack size for the function if we're optimizing. */
2393 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2394 ? estimated_stack_frame_size (node
) : 0;
2395 info
->estimated_self_stack_size
= self_stack_size
;
2396 info
->estimated_stack_size
= self_stack_size
;
2397 info
->stack_frame_offset
= 0;
2399 if (node
->thunk
.thunk_p
)
2401 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
2404 node
->local
.can_change_signature
= false;
2405 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2406 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2407 info
->account_size_time (ipa_fn_summary::size_scale
2409 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS
),
2411 (PARAM_UNINLINED_FUNCTION_THUNK_TIME
), t
, t
);
2412 t
= predicate::not_inlined ();
2413 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2414 ipa_update_overall_fn_summary (node
);
2415 info
->self_size
= info
->size
;
2416 if (stdarg_p (TREE_TYPE (node
->decl
)))
2418 info
->inlinable
= false;
2419 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2422 info
->inlinable
= true;
2426 /* Even is_gimple_min_invariant rely on current_function_decl. */
2427 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2429 /* Can this function be inlined at all? */
2430 if (!opt_for_fn (node
->decl
, optimize
)
2431 && !lookup_attribute ("always_inline",
2432 DECL_ATTRIBUTES (node
->decl
)))
2433 info
->inlinable
= false;
2435 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2437 /* Type attributes can use parameter indices to describe them. */
2438 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2439 /* Likewise for #pragma omp declare simd functions or functions
2440 with simd attribute. */
2441 || lookup_attribute ("omp declare simd",
2442 DECL_ATTRIBUTES (node
->decl
)))
2443 node
->local
.can_change_signature
= false;
2446 /* Otherwise, inlinable functions always can change signature. */
2447 if (info
->inlinable
)
2448 node
->local
.can_change_signature
= true;
2451 /* Functions calling builtin_apply cannot change signature. */
2452 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2454 tree
cdecl = e
->callee
->decl
;
2455 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
2456 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
2459 node
->local
.can_change_signature
= !e
;
2462 analyze_function_body (node
, early
);
2465 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2466 if (e
->callee
->comdat_local_p ())
2468 node
->calls_comdat_local
= (e
!= NULL
);
2470 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2471 info
->size
= info
->self_size
;
2472 info
->stack_frame_offset
= 0;
2473 info
->estimated_stack_size
= info
->estimated_self_stack_size
;
2475 /* Code above should compute exactly the same result as
2476 ipa_update_overall_fn_summary but because computation happens in
2477 different order the roundoff errors result in slight changes. */
2478 ipa_update_overall_fn_summary (node
);
2479 /* In LTO mode we may have speculative edges set. */
2480 gcc_assert (in_lto_p
|| info
->size
== info
->self_size
);
2484 /* Compute parameters of functions used by inliner using
2485 current_function_decl. */
2488 compute_fn_summary_for_current (void)
2490 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2494 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2495 KNOWN_CONTEXTS and KNOWN_AGGS. */
2498 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2499 int *size
, int *time
,
2500 vec
<tree
> known_vals
,
2501 vec
<ipa_polymorphic_call_context
> known_contexts
,
2502 vec
<ipa_agg_jump_function_p
> known_aggs
)
2505 struct cgraph_node
*callee
;
2506 class ipa_fn_summary
*isummary
;
2507 enum availability avail
;
2510 if (!known_vals
.exists () && !known_contexts
.exists ())
2512 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2515 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2516 known_aggs
, &speculative
);
2517 if (!target
|| speculative
)
2520 /* Account for difference in cost between indirect and direct calls. */
2521 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2522 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2523 gcc_checking_assert (*time
>= 0);
2524 gcc_checking_assert (*size
>= 0);
2526 callee
= cgraph_node::get (target
);
2527 if (!callee
|| !callee
->definition
)
2529 callee
= callee
->function_symbol (&avail
);
2530 if (avail
< AVAIL_AVAILABLE
)
2532 isummary
= ipa_fn_summaries
->get (callee
);
2533 if (isummary
== NULL
)
2536 return isummary
->inlinable
;
2539 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2540 handle edge E with probability PROB.
2541 Set HINTS if edge may be devirtualized.
2542 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2546 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2549 vec
<tree
> known_vals
,
2550 vec
<ipa_polymorphic_call_context
> known_contexts
,
2551 vec
<ipa_agg_jump_function_p
> known_aggs
,
2554 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2555 int call_size
= es
->call_stmt_size
;
2556 int call_time
= es
->call_stmt_time
;
2559 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2560 known_vals
, known_contexts
, known_aggs
)
2561 && hints
&& e
->maybe_hot_p ())
2562 *hints
|= INLINE_HINT_indirect_call
;
2563 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2566 *min_size
+= cur_size
;
2567 if (prob
== REG_BR_PROB_BASE
)
2568 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2570 *time
+= ((sreal
)call_time
* prob
) * e
->sreal_frequency ();
2575 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2576 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2577 describe context of the call site. */
2580 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2581 int *min_size
, sreal
*time
,
2583 clause_t possible_truths
,
2584 vec
<tree
> known_vals
,
2585 vec
<ipa_polymorphic_call_context
> known_contexts
,
2586 vec
<ipa_agg_jump_function_p
> known_aggs
)
2588 struct cgraph_edge
*e
;
2589 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2591 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2593 /* Do not care about zero sized builtins. */
2594 if (e
->inline_failed
&& !es
->call_stmt_size
)
2596 gcc_checking_assert (!es
->call_stmt_time
);
2600 || es
->predicate
->evaluate (possible_truths
))
2602 if (e
->inline_failed
)
2604 /* Predicates of calls shall not use NOT_CHANGED codes,
2605 sowe do not need to compute probabilities. */
2606 estimate_edge_size_and_time (e
, size
,
2607 es
->predicate
? NULL
: min_size
,
2608 time
, REG_BR_PROB_BASE
,
2609 known_vals
, known_contexts
,
2613 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2616 known_vals
, known_contexts
,
2620 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2622 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2624 || es
->predicate
->evaluate (possible_truths
))
2625 estimate_edge_size_and_time (e
, size
,
2626 es
->predicate
? NULL
: min_size
,
2627 time
, REG_BR_PROB_BASE
,
2628 known_vals
, known_contexts
, known_aggs
,
2634 /* Estimate size and time needed to execute NODE assuming
2635 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2636 information about NODE's arguments. If non-NULL use also probability
2637 information present in INLINE_PARAM_SUMMARY vector.
2638 Additionally detemine hints determined by the context. Finally compute
2639 minimal size needed for the call that is independent on the call context and
2640 can be used for fast estimates. Return the values in RET_SIZE,
2641 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2644 estimate_node_size_and_time (struct cgraph_node
*node
,
2645 clause_t possible_truths
,
2646 clause_t nonspec_possible_truths
,
2647 vec
<tree
> known_vals
,
2648 vec
<ipa_polymorphic_call_context
> known_contexts
,
2649 vec
<ipa_agg_jump_function_p
> known_aggs
,
2650 int *ret_size
, int *ret_min_size
,
2652 sreal
*ret_nonspecialized_time
,
2653 ipa_hints
*ret_hints
,
2654 vec
<inline_param_summary
>
2655 inline_param_summary
)
2657 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2662 ipa_hints hints
= 0;
2665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2668 fprintf (dump_file
, " Estimating body: %s/%i\n"
2669 " Known to be false: ", node
->name (),
2672 for (i
= predicate::not_inlined_condition
;
2673 i
< (predicate::first_dynamic_condition
2674 + (int) vec_safe_length (info
->conds
)); i
++)
2675 if (!(possible_truths
& (1 << i
)))
2678 fprintf (dump_file
, ", ");
2680 dump_condition (dump_file
, info
->conds
, i
);
2684 estimate_calls_size_and_time (node
, &size
, &min_size
, &time
, &hints
, possible_truths
,
2685 known_vals
, known_contexts
, known_aggs
);
2686 sreal nonspecialized_time
= time
;
2688 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
2690 bool exec
= e
->exec_predicate
.evaluate (nonspec_possible_truths
);
2692 /* Because predicates are conservative, it can happen that nonconst is 1
2696 bool nonconst
= e
->nonconst_predicate
.evaluate (possible_truths
);
2698 gcc_checking_assert (e
->time
>= 0);
2699 gcc_checking_assert (time
>= 0);
2701 /* We compute specialized size only because size of nonspecialized
2702 copy is context independent.
2704 The difference between nonspecialized execution and specialized is
2705 that nonspecialized is not going to have optimized out computations
2706 known to be constant in a specialized setting. */
2709 nonspecialized_time
+= e
->time
;
2712 else if (!inline_param_summary
.exists ())
2719 int prob
= e
->nonconst_predicate
.probability
2720 (info
->conds
, possible_truths
,
2721 inline_param_summary
);
2722 gcc_checking_assert (prob
>= 0);
2723 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
2724 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
2726 gcc_checking_assert (time
>= 0);
2729 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
2730 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
2731 min_size
= (*info
->size_time_table
)[0].size
;
2732 gcc_checking_assert (size
>= 0);
2733 gcc_checking_assert (time
>= 0);
2734 /* nonspecialized_time should be always bigger than specialized time.
2735 Roundoff issues however may get into the way. */
2736 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
2738 /* Roundoff issues may make specialized time bigger than nonspecialized
2739 time. We do not really want that to happen because some heurstics
2740 may get confused by seeing negative speedups. */
2741 if (time
> nonspecialized_time
)
2742 time
= nonspecialized_time
;
2744 if (info
->loop_iterations
2745 && !info
->loop_iterations
->evaluate (possible_truths
))
2746 hints
|= INLINE_HINT_loop_iterations
;
2747 if (info
->loop_stride
2748 && !info
->loop_stride
->evaluate (possible_truths
))
2749 hints
|= INLINE_HINT_loop_stride
;
2751 hints
|= INLINE_HINT_in_scc
;
2752 if (DECL_DECLARED_INLINE_P (node
->decl
))
2753 hints
|= INLINE_HINT_declared_inline
;
2755 size
= RDIV (size
, ipa_fn_summary::size_scale
);
2756 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
2758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2759 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
2760 time
.to_double (), nonspecialized_time
.to_double ());
2763 if (ret_nonspecialized_time
)
2764 *ret_nonspecialized_time
= nonspecialized_time
;
2768 *ret_min_size
= min_size
;
2775 /* Estimate size and time needed to execute callee of EDGE assuming that
2776 parameters known to be constant at caller of EDGE are propagated.
2777 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2778 and types for parameters. */
2781 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
2782 vec
<tree
> known_vals
,
2783 vec
<ipa_polymorphic_call_context
>
2785 vec
<ipa_agg_jump_function_p
> known_aggs
,
2786 int *ret_size
, sreal
*ret_time
,
2787 sreal
*ret_nonspec_time
,
2790 clause_t clause
, nonspec_clause
;
2792 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
2793 &clause
, &nonspec_clause
);
2794 estimate_node_size_and_time (node
, clause
, nonspec_clause
,
2795 known_vals
, known_contexts
,
2796 known_aggs
, ret_size
, NULL
, ret_time
,
2797 ret_nonspec_time
, hints
, vNULL
);
2801 /* Update summary information of inline clones after inlining.
2802 Compute peak stack usage. */
2805 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
2807 struct cgraph_edge
*e
;
2808 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (node
);
2809 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (node
->callers
->caller
);
2812 callee_info
->stack_frame_offset
2813 = caller_info
->stack_frame_offset
2814 + caller_info
->estimated_self_stack_size
;
2815 peak
= callee_info
->stack_frame_offset
2816 + callee_info
->estimated_self_stack_size
;
2818 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
->global
.inlined_to
);
2819 if (s
->estimated_stack_size
< peak
)
2820 s
->estimated_stack_size
= peak
;
2821 ipa_propagate_frequency (node
);
2822 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2824 if (!e
->inline_failed
)
2825 inline_update_callee_summaries (e
->callee
, depth
);
2826 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2828 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2829 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2832 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2833 When functoin A is inlined in B and A calls C with parameter that
2834 changes with probability PROB1 and C is known to be passthroug
2835 of argument if B that change with probability PROB2, the probability
2836 of change is now PROB1*PROB2. */
2839 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
2840 struct cgraph_edge
*edge
)
2842 if (ipa_node_params_sum
)
2845 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2846 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2847 class ipa_call_summary
*inlined_es
2848 = ipa_call_summaries
->get (inlined_edge
);
2850 if (es
->param
.length () == 0)
2853 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
2855 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
2856 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2857 || jfunc
->type
== IPA_JF_ANCESTOR
)
2859 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
2860 ? ipa_get_jf_pass_through_formal_id (jfunc
)
2861 : ipa_get_jf_ancestor_formal_id (jfunc
);
2862 if (id
< (int) inlined_es
->param
.length ())
2864 int prob1
= es
->param
[i
].change_prob
;
2865 int prob2
= inlined_es
->param
[id
].change_prob
;
2866 int prob
= combine_probabilities (prob1
, prob2
);
2868 if (prob1
&& prob2
&& !prob
)
2871 es
->param
[i
].change_prob
= prob
;
2878 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2880 Remap predicates of callees of NODE. Rest of arguments match
2883 Also update change probabilities. */
2886 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
2887 struct cgraph_node
*node
,
2888 class ipa_fn_summary
*info
,
2889 class ipa_fn_summary
*callee_info
,
2890 vec
<int> operand_map
,
2891 vec
<int> offset_map
,
2892 clause_t possible_truths
,
2893 predicate
*toplev_predicate
)
2895 struct cgraph_edge
*e
, *next
;
2896 for (e
= node
->callees
; e
; e
= next
)
2898 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2900 next
= e
->next_callee
;
2902 if (e
->inline_failed
)
2904 remap_edge_change_prob (inlined_edge
, e
);
2908 p
= es
->predicate
->remap_after_inlining
2909 (info
, callee_info
, operand_map
,
2910 offset_map
, possible_truths
,
2912 edge_set_predicate (e
, &p
);
2915 edge_set_predicate (e
, toplev_predicate
);
2918 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
2919 operand_map
, offset_map
, possible_truths
,
2922 for (e
= node
->indirect_calls
; e
; e
= next
)
2924 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2926 next
= e
->next_callee
;
2928 remap_edge_change_prob (inlined_edge
, e
);
2931 p
= es
->predicate
->remap_after_inlining
2932 (info
, callee_info
, operand_map
, offset_map
,
2933 possible_truths
, *toplev_predicate
);
2934 edge_set_predicate (e
, &p
);
2937 edge_set_predicate (e
, toplev_predicate
);
2941 /* Same as remap_predicate, but set result into hint *HINT. */
2944 remap_hint_predicate (class ipa_fn_summary
*info
,
2945 class ipa_fn_summary
*callee_info
,
2947 vec
<int> operand_map
,
2948 vec
<int> offset_map
,
2949 clause_t possible_truths
,
2950 predicate
*toplev_predicate
)
2956 p
= (*hint
)->remap_after_inlining
2958 operand_map
, offset_map
,
2959 possible_truths
, *toplev_predicate
);
2960 if (p
!= false && p
!= true)
2963 set_hint_predicate (hint
, p
);
2969 /* We inlined EDGE. Update summary of the function we inlined into. */
2972 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
2974 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
2975 struct cgraph_node
*to
= (edge
->caller
->global
.inlined_to
2976 ? edge
->caller
->global
.inlined_to
: edge
->caller
);
2977 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
2978 clause_t clause
= 0; /* not_inline is known to be false. */
2980 vec
<int> operand_map
= vNULL
;
2981 vec
<int> offset_map
= vNULL
;
2983 predicate toplev_predicate
;
2984 predicate true_p
= true;
2985 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2988 toplev_predicate
= *es
->predicate
;
2990 toplev_predicate
= true;
2992 info
->fp_expressions
|= callee_info
->fp_expressions
;
2994 if (callee_info
->conds
)
2995 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
2996 if (ipa_node_params_sum
&& callee_info
->conds
)
2998 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2999 int count
= ipa_get_cs_argument_count (args
);
3004 operand_map
.safe_grow_cleared (count
);
3005 offset_map
.safe_grow_cleared (count
);
3007 for (i
= 0; i
< count
; i
++)
3009 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3012 /* TODO: handle non-NOPs when merging. */
3013 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3015 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3016 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3017 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3020 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3022 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3023 if (offset
>= 0 && offset
< INT_MAX
)
3025 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3026 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3028 offset_map
[i
] = offset
;
3031 operand_map
[i
] = map
;
3032 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3035 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3038 p
= e
->exec_predicate
.remap_after_inlining
3039 (info
, callee_info
, operand_map
,
3042 predicate nonconstp
;
3043 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3044 (info
, callee_info
, operand_map
,
3047 if (p
!= false && nonconstp
!= false)
3049 sreal add_time
= ((sreal
)e
->time
* edge
->sreal_frequency ());
3050 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3052 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3053 if (prob
!= REG_BR_PROB_BASE
3054 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3056 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3057 (double) prob
/ REG_BR_PROB_BASE
);
3059 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3062 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3063 offset_map
, clause
, &toplev_predicate
);
3064 remap_hint_predicate (info
, callee_info
,
3065 &callee_info
->loop_iterations
,
3066 operand_map
, offset_map
, clause
, &toplev_predicate
);
3067 remap_hint_predicate (info
, callee_info
,
3068 &callee_info
->loop_stride
,
3069 operand_map
, offset_map
, clause
, &toplev_predicate
);
3071 ipa_call_summary
*s
= ipa_call_summaries
->get (edge
);
3072 inline_update_callee_summaries (edge
->callee
, s
->loop_depth
);
3074 /* We do not maintain predicates of inlined edges, free it. */
3075 edge_set_predicate (edge
, &true_p
);
3076 /* Similarly remove param summaries. */
3077 es
->param
.release ();
3078 operand_map
.release ();
3079 offset_map
.release ();
3082 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3083 and time. Recompute it. */
3086 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3088 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3094 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3096 info
->size
+= e
->size
;
3097 info
->time
+= e
->time
;
3099 estimate_calls_size_and_time (node
, &info
->size
, &info
->min_size
,
3101 ~(clause_t
) (1 << predicate::false_condition
),
3102 vNULL
, vNULL
, vNULL
);
3103 info
->size
= (info
->size
+ ipa_fn_summary::size_scale
/ 2) / ipa_fn_summary::size_scale
;
3107 /* This function performs intraprocedural analysis in NODE that is required to
3108 inline indirect calls. */
3111 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3113 ipa_analyze_node (node
);
3114 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3116 ipa_print_node_params (dump_file
, node
);
3117 ipa_print_node_jump_functions (dump_file
, node
);
3122 /* Note function body size. */
3125 inline_analyze_function (struct cgraph_node
*node
)
3127 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3130 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3131 node
->name (), node
->order
);
3132 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3133 inline_indirect_intraprocedural_analysis (node
);
3134 compute_fn_summary (node
, false);
3137 struct cgraph_edge
*e
;
3138 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3139 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3140 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3141 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3148 /* Called when new function is inserted to callgraph late. */
3151 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3153 inline_analyze_function (node
);
3156 /* Note function body size. */
3159 ipa_fn_summary_generate (void)
3161 struct cgraph_node
*node
;
3163 FOR_EACH_DEFINED_FUNCTION (node
)
3164 if (DECL_STRUCT_FUNCTION (node
->decl
))
3165 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
3167 ipa_fn_summary_alloc ();
3169 ipa_fn_summaries
->enable_insertion_hook ();
3171 ipa_register_cgraph_hooks ();
3173 FOR_EACH_DEFINED_FUNCTION (node
)
3175 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3176 || opt_for_fn (node
->decl
, optimize
)))
3177 inline_analyze_function (node
);
3181 /* Write inline summary for edge E to OB. */
3184 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
3187 class ipa_call_summary
*es
= prevails
3188 ? ipa_call_summaries
->get_create (e
) : NULL
;
3192 int size
= streamer_read_uhwi (ib
);
3193 int time
= streamer_read_uhwi (ib
);
3194 int depth
= streamer_read_uhwi (ib
);
3198 es
->call_stmt_size
= size
;
3199 es
->call_stmt_time
= time
;
3200 es
->loop_depth
= depth
;
3203 bitpack_d bp
= streamer_read_bitpack (ib
);
3205 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3207 bp_unpack_value (&bp
, 1);
3211 edge_set_predicate (e
, &p
);
3212 length
= streamer_read_uhwi (ib
);
3213 if (length
&& es
&& e
->possibly_call_in_translation_unit_p ())
3215 es
->param
.safe_grow_cleared (length
);
3216 for (i
= 0; i
< length
; i
++)
3217 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3221 for (i
= 0; i
< length
; i
++)
3222 streamer_read_uhwi (ib
);
3227 /* Stream in inline summaries from the section. */
3230 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3233 const struct lto_function_header
*header
=
3234 (const struct lto_function_header
*) data
;
3235 const int cfg_offset
= sizeof (struct lto_function_header
);
3236 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3237 const int string_offset
= main_offset
+ header
->main_size
;
3238 class data_in
*data_in
;
3239 unsigned int i
, count2
, j
;
3240 unsigned int f_count
;
3242 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3243 file_data
->mode_table
);
3246 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3247 header
->string_size
, vNULL
);
3248 f_count
= streamer_read_uhwi (&ib
);
3249 for (i
= 0; i
< f_count
; i
++)
3252 struct cgraph_node
*node
;
3253 class ipa_fn_summary
*info
;
3254 lto_symtab_encoder_t encoder
;
3255 struct bitpack_d bp
;
3256 struct cgraph_edge
*e
;
3259 index
= streamer_read_uhwi (&ib
);
3260 encoder
= file_data
->symtab_node_encoder
;
3261 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3263 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
3265 int stack_size
= streamer_read_uhwi (&ib
);
3266 int size
= streamer_read_uhwi (&ib
);
3267 sreal time
= sreal::stream_in (&ib
);
3271 info
->estimated_stack_size
3272 = info
->estimated_self_stack_size
= stack_size
;
3273 info
->size
= info
->self_size
= size
;
3277 bp
= streamer_read_bitpack (&ib
);
3280 info
->inlinable
= bp_unpack_value (&bp
, 1);
3281 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3285 bp_unpack_value (&bp
, 1);
3286 bp_unpack_value (&bp
, 1);
3289 count2
= streamer_read_uhwi (&ib
);
3290 gcc_assert (!info
|| !info
->conds
);
3291 for (j
= 0; j
< count2
; j
++)
3294 c
.operand_num
= streamer_read_uhwi (&ib
);
3295 c
.size
= streamer_read_poly_uint64 (&ib
);
3296 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3297 c
.val
= stream_read_tree (&ib
, data_in
);
3298 bp
= streamer_read_bitpack (&ib
);
3299 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3300 c
.by_ref
= bp_unpack_value (&bp
, 1);
3302 c
.offset
= streamer_read_uhwi (&ib
);
3304 vec_safe_push (info
->conds
, c
);
3306 count2
= streamer_read_uhwi (&ib
);
3307 gcc_assert (!info
|| !info
->size_time_table
);
3308 for (j
= 0; j
< count2
; j
++)
3310 class size_time_entry e
;
3312 e
.size
= streamer_read_uhwi (&ib
);
3313 e
.time
= sreal::stream_in (&ib
);
3314 e
.exec_predicate
.stream_in (&ib
);
3315 e
.nonconst_predicate
.stream_in (&ib
);
3318 vec_safe_push (info
->size_time_table
, e
);
3323 set_hint_predicate (&info
->loop_iterations
, p
);
3326 set_hint_predicate (&info
->loop_stride
, p
);
3327 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3328 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3329 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3330 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3333 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3335 lto_data_in_delete (data_in
);
3339 /* Read inline summary. Jump functions are shared among ipa-cp
3340 and inliner, so when ipa-cp is active, we don't need to write them
3344 ipa_fn_summary_read (void)
3346 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3347 struct lto_file_decl_data
*file_data
;
3350 ipa_fn_summary_alloc ();
3352 while ((file_data
= file_data_vec
[j
++]))
3355 const char *data
= lto_get_section_data (file_data
,
3356 LTO_section_ipa_fn_summary
,
3359 inline_read_section (file_data
, data
, len
);
3361 /* Fatal error here. We do not want to support compiling ltrans units
3362 with different version of compiler or different flags than the WPA
3363 unit, so this should never happen. */
3364 fatal_error (input_location
,
3365 "ipa inline summary is missing in input file");
3367 ipa_register_cgraph_hooks ();
3369 ipa_prop_read_jump_functions ();
3371 gcc_assert (ipa_fn_summaries
);
3372 ipa_fn_summaries
->enable_insertion_hook ();
3376 /* Write inline summary for edge E to OB. */
3379 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3381 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3384 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3385 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3386 streamer_write_uhwi (ob
, es
->loop_depth
);
3388 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3389 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3390 streamer_write_bitpack (&bp
);
3393 es
->predicate
->stream_out (ob
);
3395 streamer_write_uhwi (ob
, 0);
3396 streamer_write_uhwi (ob
, es
->param
.length ());
3397 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3398 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3402 /* Write inline summary for node in SET.
3403 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3404 active, we don't need to write them twice. */
3407 ipa_fn_summary_write (void)
3409 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3410 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3411 unsigned int count
= 0;
3414 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3416 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3417 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3418 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3421 streamer_write_uhwi (ob
, count
);
3423 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3425 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3426 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3427 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3429 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3430 struct bitpack_d bp
;
3431 struct cgraph_edge
*edge
;
3434 struct condition
*c
;
3436 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3437 streamer_write_hwi (ob
, info
->estimated_self_stack_size
);
3438 streamer_write_hwi (ob
, info
->self_size
);
3439 info
->time
.stream_out (ob
);
3440 bp
= bitpack_create (ob
->main_stream
);
3441 bp_pack_value (&bp
, info
->inlinable
, 1);
3442 bp_pack_value (&bp
, false, 1);
3443 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3444 streamer_write_bitpack (&bp
);
3445 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3446 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3448 streamer_write_uhwi (ob
, c
->operand_num
);
3449 streamer_write_poly_uint64 (ob
, c
->size
);
3450 streamer_write_uhwi (ob
, c
->code
);
3451 stream_write_tree (ob
, c
->val
, true);
3452 bp
= bitpack_create (ob
->main_stream
);
3453 bp_pack_value (&bp
, c
->agg_contents
, 1);
3454 bp_pack_value (&bp
, c
->by_ref
, 1);
3455 streamer_write_bitpack (&bp
);
3456 if (c
->agg_contents
)
3457 streamer_write_uhwi (ob
, c
->offset
);
3459 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3460 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3462 streamer_write_uhwi (ob
, e
->size
);
3463 e
->time
.stream_out (ob
);
3464 e
->exec_predicate
.stream_out (ob
);
3465 e
->nonconst_predicate
.stream_out (ob
);
3467 if (info
->loop_iterations
)
3468 info
->loop_iterations
->stream_out (ob
);
3470 streamer_write_uhwi (ob
, 0);
3471 if (info
->loop_stride
)
3472 info
->loop_stride
->stream_out (ob
);
3474 streamer_write_uhwi (ob
, 0);
3475 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3476 write_ipa_call_summary (ob
, edge
);
3477 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3478 write_ipa_call_summary (ob
, edge
);
3481 streamer_write_char_stream (ob
->main_stream
, 0);
3482 produce_asm (ob
, NULL
);
3483 destroy_output_block (ob
);
3486 ipa_prop_write_jump_functions ();
3490 /* Release inline summary. */
3493 ipa_free_fn_summary (void)
3495 struct cgraph_node
*node
;
3496 if (!ipa_call_summaries
)
3498 FOR_EACH_DEFINED_FUNCTION (node
)
3500 ipa_fn_summaries
->remove (node
);
3501 ipa_fn_summaries
->release ();
3502 ipa_fn_summaries
= NULL
;
3503 ipa_call_summaries
->release ();
3504 delete ipa_call_summaries
;
3505 ipa_call_summaries
= NULL
;
3506 edge_predicate_pool
.release ();
3511 const pass_data pass_data_local_fn_summary
=
3513 GIMPLE_PASS
, /* type */
3514 "local-fnsummary", /* name */
3515 OPTGROUP_INLINE
, /* optinfo_flags */
3516 TV_INLINE_PARAMETERS
, /* tv_id */
3517 0, /* properties_required */
3518 0, /* properties_provided */
3519 0, /* properties_destroyed */
3520 0, /* todo_flags_start */
3521 0, /* todo_flags_finish */
3524 class pass_local_fn_summary
: public gimple_opt_pass
3527 pass_local_fn_summary (gcc::context
*ctxt
)
3528 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3531 /* opt_pass methods: */
3532 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3533 virtual unsigned int execute (function
*)
3535 return compute_fn_summary_for_current ();
3538 }; // class pass_local_fn_summary
3543 make_pass_local_fn_summary (gcc::context
*ctxt
)
3545 return new pass_local_fn_summary (ctxt
);
3549 /* Free inline summary. */
3553 const pass_data pass_data_ipa_free_fn_summary
=
3555 SIMPLE_IPA_PASS
, /* type */
3556 "free-fnsummary", /* name */
3557 OPTGROUP_NONE
, /* optinfo_flags */
3558 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3559 0, /* properties_required */
3560 0, /* properties_provided */
3561 0, /* properties_destroyed */
3562 0, /* todo_flags_start */
3563 0, /* todo_flags_finish */
3566 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3569 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3570 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
3574 /* opt_pass methods: */
3575 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
3576 void set_pass_param (unsigned int n
, bool param
)
3578 gcc_assert (n
== 0);
3581 virtual bool gate (function
*) { return small_p
|| !flag_wpa
; }
3582 virtual unsigned int execute (function
*)
3584 ipa_free_fn_summary ();
3590 }; // class pass_ipa_free_fn_summary
3594 simple_ipa_opt_pass
*
3595 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3597 return new pass_ipa_free_fn_summary (ctxt
);
3602 const pass_data pass_data_ipa_fn_summary
=
3604 IPA_PASS
, /* type */
3605 "fnsummary", /* name */
3606 OPTGROUP_INLINE
, /* optinfo_flags */
3607 TV_IPA_FNSUMMARY
, /* tv_id */
3608 0, /* properties_required */
3609 0, /* properties_provided */
3610 0, /* properties_destroyed */
3611 0, /* todo_flags_start */
3612 ( TODO_dump_symtab
), /* todo_flags_finish */
3615 class pass_ipa_fn_summary
: public ipa_opt_pass_d
3618 pass_ipa_fn_summary (gcc::context
*ctxt
)
3619 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
3620 ipa_fn_summary_generate
, /* generate_summary */
3621 ipa_fn_summary_write
, /* write_summary */
3622 ipa_fn_summary_read
, /* read_summary */
3623 NULL
, /* write_optimization_summary */
3624 NULL
, /* read_optimization_summary */
3625 NULL
, /* stmt_fixup */
3626 0, /* function_transform_todo_flags_start */
3627 NULL
, /* function_transform */
3628 NULL
) /* variable_transform */
3631 /* opt_pass methods: */
3632 virtual unsigned int execute (function
*) { return 0; }
3634 }; // class pass_ipa_fn_summary
3639 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
3641 return new pass_ipa_fn_summary (ctxt
);
3644 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3645 within the same process. For use by toplev::finalize. */
3648 ipa_fnsummary_c_finalize (void)
3650 ipa_free_fn_summary ();