1 /* Function summary pass.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
88 function_summary
<ipa_fn_summary
*> *ipa_fn_summaries
;
89 call_summary
<ipa_call_summary
*> *ipa_call_summaries
;
91 /* Edge predicates goes here. */
92 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
97 ipa_dump_hints (FILE *f
, ipa_hints hints
)
101 fprintf (f
, "IPA hints:");
102 if (hints
& INLINE_HINT_indirect_call
)
104 hints
&= ~INLINE_HINT_indirect_call
;
105 fprintf (f
, " indirect_call");
107 if (hints
& INLINE_HINT_loop_iterations
)
109 hints
&= ~INLINE_HINT_loop_iterations
;
110 fprintf (f
, " loop_iterations");
112 if (hints
& INLINE_HINT_loop_stride
)
114 hints
&= ~INLINE_HINT_loop_stride
;
115 fprintf (f
, " loop_stride");
117 if (hints
& INLINE_HINT_same_scc
)
119 hints
&= ~INLINE_HINT_same_scc
;
120 fprintf (f
, " same_scc");
122 if (hints
& INLINE_HINT_in_scc
)
124 hints
&= ~INLINE_HINT_in_scc
;
125 fprintf (f
, " in_scc");
127 if (hints
& INLINE_HINT_cross_module
)
129 hints
&= ~INLINE_HINT_cross_module
;
130 fprintf (f
, " cross_module");
132 if (hints
& INLINE_HINT_declared_inline
)
134 hints
&= ~INLINE_HINT_declared_inline
;
135 fprintf (f
, " declared_inline");
137 if (hints
& INLINE_HINT_array_index
)
139 hints
&= ~INLINE_HINT_array_index
;
140 fprintf (f
, " array_index");
142 if (hints
& INLINE_HINT_known_hot
)
144 hints
&= ~INLINE_HINT_known_hot
;
145 fprintf (f
, " known_hot");
151 /* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
157 ipa_fn_summary::account_size_time (int size
, sreal time
,
158 const predicate
&exec_pred
,
159 const predicate
&nonconst_pred_in
)
164 predicate nonconst_pred
;
166 if (exec_pred
== false)
169 nonconst_pred
= nonconst_pred_in
& exec_pred
;
171 if (nonconst_pred
== false)
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size
&& time
== 0 && size_time_table
)
179 gcc_assert (time
>= 0);
181 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
182 if (e
->exec_predicate
== exec_pred
183 && e
->nonconst_predicate
== nonconst_pred
)
192 e
= &(*size_time_table
)[0];
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
198 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size
) / ipa_fn_summary::size_scale
,
203 (time
.to_double ()), found
? "" : "new ");
204 exec_pred
.dump (dump_file
, conds
, 0);
205 if (exec_pred
!= nonconst_pred
)
207 fprintf (dump_file
, " nonconst:");
208 nonconst_pred
.dump (dump_file
, conds
);
211 fprintf (dump_file
, "\n");
215 struct size_time_entry new_entry
;
216 new_entry
.size
= size
;
217 new_entry
.time
= time
;
218 new_entry
.exec_predicate
= exec_pred
;
219 new_entry
.nonconst_predicate
= nonconst_pred
;
220 vec_safe_push (size_time_table
, new_entry
);
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
231 static struct cgraph_edge
*
232 redirect_to_unreachable (struct cgraph_edge
*e
)
234 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
235 struct cgraph_node
*target
= cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
239 e
= e
->resolve_speculation (target
->decl
);
241 e
->make_direct (target
);
243 e
->redirect_callee (target
);
244 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
245 e
->inline_failed
= CIF_UNREACHABLE
;
246 e
->count
= profile_count::zero ();
247 es
->call_stmt_size
= 0;
248 es
->call_stmt_time
= 0;
250 callee
->remove_symbol_and_inline_clones ();
254 /* Set predicate for edge E. */
257 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
262 if (predicate
&& *predicate
== false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e
->speculative
|| e
->callee
))
267 e
= redirect_to_unreachable (e
);
269 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
270 if (predicate
&& *predicate
!= true)
273 es
->predicate
= edge_predicate_pool
.allocate ();
274 *es
->predicate
= *predicate
;
279 edge_predicate_pool
.remove (es
->predicate
);
280 es
->predicate
= NULL
;
284 /* Set predicate for hint *P. */
287 set_hint_predicate (predicate
**p
, predicate new_predicate
)
289 if (new_predicate
== false || new_predicate
== true)
292 edge_predicate_pool
.remove (*p
);
298 *p
= edge_predicate_pool
.allocate ();
304 /* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests can not use
311 the fact that parameter is indeed a constant.
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
318 ERROR_MARK means compile time invariant. */
321 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
323 vec
<tree
> known_vals
,
324 vec
<ipa_agg_jump_function_p
>
326 clause_t
*ret_clause
,
327 clause_t
*ret_nonspec_clause
)
329 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
330 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
331 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
335 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
344 gcc_checking_assert (!known_aggs
.exists ()
345 || (known_vals
.length () == known_aggs
.length ()));
346 if (c
->operand_num
>= (int) known_vals
.length ())
348 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
349 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
355 struct ipa_agg_jump_function
*agg
;
357 if (c
->code
== predicate::changed
359 && (known_vals
[c
->operand_num
] == error_mark_node
))
362 if (known_aggs
.exists ())
364 agg
= known_aggs
[c
->operand_num
];
365 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
366 c
->offset
, c
->by_ref
);
373 val
= known_vals
[c
->operand_num
];
374 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
380 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
381 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
384 if (c
->code
== predicate::changed
)
386 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val
))) != c
->size
)
392 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
393 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
396 if (c
->code
== predicate::is_not_constant
)
398 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
402 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (c
->val
), val
);
404 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
407 if (res
&& integer_zerop (res
))
410 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
411 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
413 *ret_clause
= clause
;
414 if (ret_nonspec_clause
)
415 *ret_nonspec_clause
= nonspec_clause
;
419 /* Work out what conditions might be true at invocation of E. */
422 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
423 clause_t
*clause_ptr
,
424 clause_t
*nonspec_clause_ptr
,
425 vec
<tree
> *known_vals_ptr
,
426 vec
<ipa_polymorphic_call_context
>
428 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
430 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
431 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
432 vec
<tree
> known_vals
= vNULL
;
433 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
436 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
438 known_vals_ptr
->create (0);
439 if (known_contexts_ptr
)
440 known_contexts_ptr
->create (0);
442 if (ipa_node_params_sum
443 && !e
->call_stmt_cannot_inline_p
444 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
))
446 struct ipa_node_params
*caller_parms_info
, *callee_pi
;
447 struct ipa_edge_args
*args
= IPA_EDGE_REF (e
);
448 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
449 int i
, count
= ipa_get_cs_argument_count (args
);
451 if (e
->caller
->global
.inlined_to
)
452 caller_parms_info
= IPA_NODE_REF (e
->caller
->global
.inlined_to
);
454 caller_parms_info
= IPA_NODE_REF (e
->caller
);
455 callee_pi
= IPA_NODE_REF (e
->callee
);
457 if (count
&& (info
->conds
|| known_vals_ptr
))
458 known_vals
.safe_grow_cleared (count
);
459 if (count
&& (info
->conds
|| known_aggs_ptr
))
460 known_aggs
.safe_grow_cleared (count
);
461 if (count
&& known_contexts_ptr
)
462 known_contexts_ptr
->safe_grow_cleared (count
);
464 for (i
= 0; i
< count
; i
++)
466 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
467 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
468 ipa_get_type (callee_pi
, i
));
470 if (!cst
&& e
->call_stmt
471 && i
< (int)gimple_call_num_args (e
->call_stmt
))
473 cst
= gimple_call_arg (e
->call_stmt
, i
);
474 if (!is_gimple_min_invariant (cst
))
479 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
480 if (known_vals
.exists ())
483 else if (inline_p
&& !es
->param
[i
].change_prob
)
484 known_vals
[i
] = error_mark_node
;
486 if (known_contexts_ptr
)
487 (*known_contexts_ptr
)[i
]
488 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs
[i
] = &jf
->agg
;
495 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
496 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
498 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
500 if (count
&& (info
->conds
|| known_vals_ptr
))
501 known_vals
.safe_grow_cleared (count
);
502 for (i
= 0; i
< count
; i
++)
504 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
505 if (!is_gimple_min_invariant (cst
))
512 evaluate_conditions_for_known_args (callee
, inline_p
,
513 known_vals
, known_aggs
, clause_ptr
,
517 *known_vals_ptr
= known_vals
;
519 known_vals
.release ();
522 *known_aggs_ptr
= known_aggs
;
524 known_aggs
.release ();
528 /* Allocate the function summary. */
531 ipa_fn_summary_alloc (void)
533 gcc_checking_assert (!ipa_fn_summaries
);
534 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
535 ipa_call_summaries
= new ipa_call_summary_t (symtab
, false);
538 /* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
542 ipa_call_summary::reset ()
544 call_stmt_size
= call_stmt_time
= 0;
545 is_return_callee_uncaptured
= false;
547 edge_predicate_pool
.remove (predicate
);
552 /* We are called multiple time for given function; clear
553 data from previous run so they are not cumulated. */
556 ipa_fn_summary::reset (struct cgraph_node
*node
)
558 struct cgraph_edge
*e
;
561 estimated_stack_size
= 0;
562 estimated_self_stack_size
= 0;
563 stack_frame_offset
= 0;
570 edge_predicate_pool
.remove (loop_iterations
);
571 loop_iterations
= NULL
;
575 edge_predicate_pool
.remove (loop_stride
);
580 edge_predicate_pool
.remove (array_index
);
584 vec_free (size_time_table
);
585 for (e
= node
->callees
; e
; e
= e
->next_callee
)
586 ipa_call_summaries
->get (e
)->reset ();
587 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
588 ipa_call_summaries
->get (e
)->reset ();
589 fp_expressions
= false;
592 /* Hook that is called by cgraph.c when a node is removed. */
595 ipa_fn_summary_t::remove (cgraph_node
*node
, ipa_fn_summary
*info
)
600 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
601 Additionally care about allocating new memory slot for updated predicate
602 and set it to NULL when it becomes true or false (and thus uninteresting).
606 remap_hint_predicate_after_duplication (predicate
**p
,
607 clause_t possible_truths
)
609 predicate new_predicate
;
614 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
615 /* We do not want to free previous predicate; it is used by node origin. */
617 set_hint_predicate (p
, new_predicate
);
621 /* Hook that is called by cgraph.c when a node is duplicated. */
623 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
626 ipa_fn_summary
*info
)
628 memcpy (info
, ipa_fn_summaries
->get (src
), sizeof (ipa_fn_summary
));
629 /* TODO: as an optimization, we may avoid copying conditions
630 that are known to be false or true. */
631 info
->conds
= vec_safe_copy (info
->conds
);
633 /* When there are any replacements in the function body, see if we can figure
634 out that something was optimized out. */
635 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
637 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
638 /* Use SRC parm info since it may not be copied yet. */
639 struct ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
640 vec
<tree
> known_vals
= vNULL
;
641 int count
= ipa_get_param_count (parms_info
);
643 clause_t possible_truths
;
644 predicate true_pred
= true;
646 int optimized_out_size
= 0;
647 bool inlined_to_p
= false;
648 struct cgraph_edge
*edge
, *next
;
650 info
->size_time_table
= 0;
651 known_vals
.safe_grow_cleared (count
);
652 for (i
= 0; i
< count
; i
++)
654 struct ipa_replace_map
*r
;
656 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
658 if (((!r
->old_tree
&& r
->parm_num
== i
)
659 || (r
->old_tree
&& r
->old_tree
== ipa_get_param (parms_info
, i
)))
660 && r
->replace_p
&& !r
->ref_p
)
662 known_vals
[i
] = r
->new_tree
;
667 evaluate_conditions_for_known_args (dst
, false,
671 /* We are going to specialize,
672 so ignore nonspec truths. */
674 known_vals
.release ();
676 info
->account_size_time (0, 0, true_pred
, true_pred
);
678 /* Remap size_time vectors.
679 Simplify the predicate by prunning out alternatives that are known
681 TODO: as on optimization, we can also eliminate conditions known
683 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
685 predicate new_exec_pred
;
686 predicate new_nonconst_pred
;
687 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
689 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
691 if (new_exec_pred
== false || new_nonconst_pred
== false)
692 optimized_out_size
+= e
->size
;
694 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
698 /* Remap edge predicates with the same simplification as above.
699 Also copy constantness arrays. */
700 for (edge
= dst
->callees
; edge
; edge
= next
)
702 predicate new_predicate
;
703 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
704 next
= edge
->next_callee
;
706 if (!edge
->inline_failed
)
710 new_predicate
= es
->predicate
->remap_after_duplication
712 if (new_predicate
== false && *es
->predicate
!= false)
713 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
714 edge_set_predicate (edge
, &new_predicate
);
717 /* Remap indirect edge predicates with the same simplificaiton as above.
718 Also copy constantness arrays. */
719 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
721 predicate new_predicate
;
722 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
723 next
= edge
->next_callee
;
725 gcc_checking_assert (edge
->inline_failed
);
728 new_predicate
= es
->predicate
->remap_after_duplication
730 if (new_predicate
== false && *es
->predicate
!= false)
731 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
732 edge_set_predicate (edge
, &new_predicate
);
734 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
736 remap_hint_predicate_after_duplication (&info
->loop_stride
,
738 remap_hint_predicate_after_duplication (&info
->array_index
,
741 /* If inliner or someone after inliner will ever start producing
742 non-trivial clones, we will get trouble with lack of information
743 about updating self sizes, because size vectors already contains
744 sizes of the calees. */
745 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
749 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
750 if (info
->loop_iterations
)
752 predicate p
= *info
->loop_iterations
;
753 info
->loop_iterations
= NULL
;
754 set_hint_predicate (&info
->loop_iterations
, p
);
756 if (info
->loop_stride
)
758 predicate p
= *info
->loop_stride
;
759 info
->loop_stride
= NULL
;
760 set_hint_predicate (&info
->loop_stride
, p
);
762 if (info
->array_index
)
764 predicate p
= *info
->array_index
;
765 info
->array_index
= NULL
;
766 set_hint_predicate (&info
->array_index
, p
);
769 if (!dst
->global
.inlined_to
)
770 ipa_update_overall_fn_summary (dst
);
774 /* Hook that is called by cgraph.c when a node is duplicated. */
777 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
778 struct cgraph_edge
*dst
,
779 struct ipa_call_summary
*srcinfo
,
780 struct ipa_call_summary
*info
)
783 info
->predicate
= NULL
;
784 edge_set_predicate (dst
, srcinfo
->predicate
);
785 info
->param
= srcinfo
->param
.copy ();
786 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
788 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
789 - eni_size_weights
.call_cost
);
790 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
791 - eni_time_weights
.call_cost
);
796 /* Keep edge cache consistent across edge removal. */
799 ipa_call_summary_t::remove (struct cgraph_edge
*,
800 struct ipa_call_summary
*sum
)
806 /* Dump edge summaries associated to NODE and recursively to all clones.
810 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
811 struct ipa_fn_summary
*info
)
813 struct cgraph_edge
*edge
;
814 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
816 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
817 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
821 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i"
822 " time: %2i callee size:%2i stack:%2i",
823 indent
, "", callee
->name (), callee
->order
,
825 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
826 indent
, "", es
->loop_depth
, edge
->sreal_frequency ().to_double (),
827 es
->call_stmt_size
, es
->call_stmt_time
,
828 (int) ipa_fn_summaries
->get (callee
)->size
/ ipa_fn_summary::size_scale
,
829 (int) ipa_fn_summaries
->get (callee
)->estimated_stack_size
);
833 fprintf (f
, " predicate: ");
834 es
->predicate
->dump (f
, info
->conds
);
838 if (es
->param
.exists ())
839 for (i
= 0; i
< (int) es
->param
.length (); i
++)
841 int prob
= es
->param
[i
].change_prob
;
844 fprintf (f
, "%*s op%i is compile time invariant\n",
846 else if (prob
!= REG_BR_PROB_BASE
)
847 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
848 prob
* 100.0 / REG_BR_PROB_BASE
);
850 if (!edge
->inline_failed
)
852 fprintf (f
, "%*sStack frame offset %i, callee self size %i,"
855 (int) ipa_fn_summaries
->get (callee
)->stack_frame_offset
,
856 (int) ipa_fn_summaries
->get (callee
)->estimated_self_stack_size
,
857 (int) ipa_fn_summaries
->get (callee
)->estimated_stack_size
);
858 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
861 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
863 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
864 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
868 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
872 fprintf (f
, "predicate: ");
873 es
->predicate
->dump (f
, info
->conds
);
882 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
884 if (node
->definition
)
886 struct ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
889 fprintf (f
, "IPA function summary for %s/%i", node
->name (),
891 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
892 fprintf (f
, " always_inline");
894 fprintf (f
, " inlinable");
895 if (s
->fp_expressions
)
896 fprintf (f
, " fp_expression");
897 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
898 fprintf (f
, " self size: %i\n", s
->self_size
);
899 fprintf (f
, " global size: %i\n", s
->size
);
900 fprintf (f
, " min size: %i\n", s
->min_size
);
901 fprintf (f
, " self stack: %i\n",
902 (int) s
->estimated_self_stack_size
);
903 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
905 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
907 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
908 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
910 fprintf (f
, " size:%f, time:%f",
911 (double) e
->size
/ ipa_fn_summary::size_scale
,
912 e
->time
.to_double ());
913 if (e
->exec_predicate
!= true)
915 fprintf (f
, ", executed if:");
916 e
->exec_predicate
.dump (f
, s
->conds
, 0);
918 if (e
->exec_predicate
!= e
->nonconst_predicate
)
920 fprintf (f
, ", nonconst if:");
921 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
925 if (s
->loop_iterations
)
927 fprintf (f
, " loop iterations:");
928 s
->loop_iterations
->dump (f
, s
->conds
);
932 fprintf (f
, " loop stride:");
933 s
->loop_stride
->dump (f
, s
->conds
);
937 fprintf (f
, " array index:");
938 s
->array_index
->dump (f
, s
->conds
);
940 fprintf (f
, " calls:\n");
941 dump_ipa_call_summary (f
, 4, node
, s
);
947 ipa_debug_fn_summary (struct cgraph_node
*node
)
949 ipa_dump_fn_summary (stderr
, node
);
953 ipa_dump_fn_summaries (FILE *f
)
955 struct cgraph_node
*node
;
957 FOR_EACH_DEFINED_FUNCTION (node
)
958 if (!node
->global
.inlined_to
)
959 ipa_dump_fn_summary (f
, node
);
962 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
963 boolean variable pointed to by DATA. */
966 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
969 bool *b
= (bool *) data
;
974 /* If OP refers to value of function parameter, return the corresponding
975 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
976 PARM_DECL) will be stored to *SIZE_P in that case too. */
979 unmodified_parm_1 (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
981 /* SSA_NAME referring to parm default def? */
982 if (TREE_CODE (op
) == SSA_NAME
983 && SSA_NAME_IS_DEFAULT_DEF (op
)
984 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
987 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
988 return SSA_NAME_VAR (op
);
990 /* Non-SSA parm reference? */
991 if (TREE_CODE (op
) == PARM_DECL
)
993 bool modified
= false;
996 ao_ref_init (&refd
, op
);
997 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), mark_modified
, &modified
,
1002 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
1009 /* If OP refers to value of function parameter, return the corresponding
1010 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1011 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1012 stored to *SIZE_P in that case too. */
1015 unmodified_parm (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
1017 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
1021 if (TREE_CODE (op
) == SSA_NAME
1022 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1023 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1024 return unmodified_parm (SSA_NAME_DEF_STMT (op
),
1025 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1030 /* If OP refers to a value of a function parameter or value loaded from an
1031 aggregate passed to a parameter (either by value or reference), return TRUE
1032 and store the number of the parameter to *INDEX_P, the access size into
1033 *SIZE_P, and information whether and how it has been loaded from an
1034 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1035 statement in which OP is used or loaded. */
1038 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1039 gimple
*stmt
, tree op
, int *index_p
,
1040 HOST_WIDE_INT
*size_p
,
1041 struct agg_position_info
*aggpos
)
1043 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
1045 gcc_checking_assert (aggpos
);
1048 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1051 aggpos
->agg_contents
= false;
1052 aggpos
->by_ref
= false;
1056 if (TREE_CODE (op
) == SSA_NAME
)
1058 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1059 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1061 stmt
= SSA_NAME_DEF_STMT (op
);
1062 op
= gimple_assign_rhs1 (stmt
);
1063 if (!REFERENCE_CLASS_P (op
))
1064 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1068 aggpos
->agg_contents
= true;
1069 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1070 stmt
, op
, index_p
, &aggpos
->offset
,
1071 size_p
, &aggpos
->by_ref
);
1074 /* See if statement might disappear after inlining.
1075 0 - means not eliminated
1076 1 - half of statements goes away
1077 2 - for sure it is eliminated.
1078 We are not terribly sophisticated, basically looking for simple abstraction
1079 penalty wrappers. */
1082 eliminated_by_inlining_prob (gimple
*stmt
)
1084 enum gimple_code code
= gimple_code (stmt
);
1085 enum tree_code rhs_code
;
1095 if (gimple_num_ops (stmt
) != 2)
1098 rhs_code
= gimple_assign_rhs_code (stmt
);
1100 /* Casts of parameters, loads from parameters passed by reference
1101 and stores to return value or parameters are often free after
1102 inlining dua to SRA and further combining.
1103 Assume that half of statements goes away. */
1104 if (CONVERT_EXPR_CODE_P (rhs_code
)
1105 || rhs_code
== VIEW_CONVERT_EXPR
1106 || rhs_code
== ADDR_EXPR
1107 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1109 tree rhs
= gimple_assign_rhs1 (stmt
);
1110 tree lhs
= gimple_assign_lhs (stmt
);
1111 tree inner_rhs
= get_base_address (rhs
);
1112 tree inner_lhs
= get_base_address (lhs
);
1113 bool rhs_free
= false;
1114 bool lhs_free
= false;
1121 /* Reads of parameter are expected to be free. */
1122 if (unmodified_parm (stmt
, inner_rhs
, NULL
))
1124 /* Match expressions of form &this->field. Those will most likely
1125 combine with something upstream after inlining. */
1126 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1128 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1129 if (TREE_CODE (op
) == PARM_DECL
)
1131 else if (TREE_CODE (op
) == MEM_REF
1132 && unmodified_parm (stmt
, TREE_OPERAND (op
, 0), NULL
))
1136 /* When parameter is not SSA register because its address is taken
1137 and it is just copied into one, the statement will be completely
1138 free after inlining (we will copy propagate backward). */
1139 if (rhs_free
&& is_gimple_reg (lhs
))
1142 /* Reads of parameters passed by reference
1143 expected to be free (i.e. optimized out after inlining). */
1144 if (TREE_CODE (inner_rhs
) == MEM_REF
1145 && unmodified_parm (stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1148 /* Copying parameter passed by reference into gimple register is
1149 probably also going to copy propagate, but we can't be quite
1151 if (rhs_free
&& is_gimple_reg (lhs
))
1154 /* Writes to parameters, parameters passed by value and return value
1155 (either dirrectly or passed via invisible reference) are free.
1157 TODO: We ought to handle testcase like
1158 struct a {int a,b;};
1160 retrurnsturct (void)
1166 This translate into:
1181 For that we either need to copy ipa-split logic detecting writes
1183 if (TREE_CODE (inner_lhs
) == PARM_DECL
1184 || TREE_CODE (inner_lhs
) == RESULT_DECL
1185 || (TREE_CODE (inner_lhs
) == MEM_REF
1186 && (unmodified_parm (stmt
, TREE_OPERAND (inner_lhs
, 0), NULL
)
1187 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1188 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1189 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1191 0))) == RESULT_DECL
))))
1194 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1196 if (lhs_free
&& rhs_free
)
1206 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1207 predicates to the CFG edges. */
1210 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1211 struct ipa_fn_summary
*summary
,
1218 struct agg_position_info aggpos
;
1219 enum tree_code code
, inverted_code
;
1225 last
= last_stmt (bb
);
1226 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1228 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1230 op
= gimple_cond_lhs (last
);
1231 /* TODO: handle conditionals like
1234 if (unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1236 code
= gimple_cond_code (last
);
1237 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1239 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1241 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1242 ? code
: inverted_code
);
1243 /* invert_tree_comparison will return ERROR_MARK on FP
1244 comparsions that are not EQ/NE instead of returning proper
1245 unordered one. Be sure it is not confused with NON_CONSTANT. */
1246 if (this_code
!= ERROR_MARK
)
1249 = add_condition (summary
, index
, size
, &aggpos
, this_code
,
1250 unshare_expr_without_location
1251 (gimple_cond_rhs (last
)));
1252 e
->aux
= edge_predicate_pool
.allocate ();
1253 *(predicate
*) e
->aux
= p
;
1258 if (TREE_CODE (op
) != SSA_NAME
)
1261 if (builtin_constant_p (op))
1265 Here we can predicate nonconstant_code. We can't
1266 really handle constant_code since we have no predicate
1267 for this and also the constant code is not known to be
1268 optimized away when inliner doen't see operand is constant.
1269 Other optimizers might think otherwise. */
1270 if (gimple_cond_code (last
) != NE_EXPR
1271 || !integer_zerop (gimple_cond_rhs (last
)))
1273 set_stmt
= SSA_NAME_DEF_STMT (op
);
1274 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1275 || gimple_call_num_args (set_stmt
) != 1)
1277 op2
= gimple_call_arg (set_stmt
, 0);
1278 if (!unmodified_parm_or_parm_agg_item (fbi
, set_stmt
, op2
, &index
, &size
,
1281 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1283 predicate p
= add_condition (summary
, index
, size
, &aggpos
,
1284 predicate::is_not_constant
, NULL_TREE
);
1285 e
->aux
= edge_predicate_pool
.allocate ();
1286 *(predicate
*) e
->aux
= p
;
1291 /* If BB ends by a switch we can turn into predicates, attach corresponding
1292 predicates to the CFG edges. */
1295 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1296 struct ipa_fn_summary
*summary
,
1303 struct agg_position_info aggpos
;
1309 lastg
= last_stmt (bb
);
1310 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1312 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1313 op
= gimple_switch_index (last
);
1314 if (!unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1317 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1319 e
->aux
= edge_predicate_pool
.allocate ();
1320 *(predicate
*) e
->aux
= false;
1322 n
= gimple_switch_num_labels (last
);
1323 for (case_idx
= 0; case_idx
< n
; ++case_idx
)
1325 tree cl
= gimple_switch_label (last
, case_idx
);
1329 e
= find_edge (bb
, label_to_block (CASE_LABEL (cl
)));
1330 min
= CASE_LOW (cl
);
1331 max
= CASE_HIGH (cl
);
1333 /* For default we might want to construct predicate that none
1334 of cases is met, but it is bit hard to do not having negations
1335 of conditionals handy. */
1339 p
= add_condition (summary
, index
, size
, &aggpos
, EQ_EXPR
,
1340 unshare_expr_without_location (min
));
1344 p1
= add_condition (summary
, index
, size
, &aggpos
, GE_EXPR
,
1345 unshare_expr_without_location (min
));
1346 p2
= add_condition (summary
, index
, size
, &aggpos
, LE_EXPR
,
1347 unshare_expr_without_location (max
));
1350 *(struct predicate
*) e
->aux
1351 = p
.or_with (summary
->conds
, *(struct predicate
*) e
->aux
);
1356 /* For each BB in NODE attach to its AUX pointer predicate under
1357 which it is executable. */
1360 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1361 struct cgraph_node
*node
,
1362 struct ipa_fn_summary
*summary
)
1364 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1368 FOR_EACH_BB_FN (bb
, my_function
)
1370 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1371 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1374 /* Entry block is always executable. */
1375 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1376 = edge_predicate_pool
.allocate ();
1377 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1379 /* A simple dataflow propagation of predicates forward in the CFG.
1380 TODO: work in reverse postorder. */
1384 FOR_EACH_BB_FN (bb
, my_function
)
1386 predicate p
= false;
1389 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1393 predicate this_bb_predicate
1394 = *(predicate
*) e
->src
->aux
;
1396 this_bb_predicate
&= (*(struct predicate
*) e
->aux
);
1397 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1403 gcc_checking_assert (!bb
->aux
);
1409 bb
->aux
= edge_predicate_pool
.allocate ();
1410 *((predicate
*) bb
->aux
) = p
;
1412 else if (p
!= *(predicate
*) bb
->aux
)
1414 /* This OR operation is needed to ensure monotonous data flow
1415 in the case we hit the limit on number of clauses and the
1416 and/or operations above give approximate answers. */
1417 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1418 if (p
!= *(predicate
*) bb
->aux
)
1421 *((predicate
*) bb
->aux
) = p
;
1430 /* Return predicate specifying when the STMT might have result that is not
1431 a compile time constant. */
1434 will_be_nonconstant_expr_predicate (struct ipa_node_params
*info
,
1435 struct ipa_fn_summary
*summary
,
1437 vec
<predicate
> nonconstant_names
)
1443 while (UNARY_CLASS_P (expr
))
1444 expr
= TREE_OPERAND (expr
, 0);
1446 parm
= unmodified_parm (NULL
, expr
, &size
);
1447 if (parm
&& (index
= ipa_get_param_decl_index (info
, parm
)) >= 0)
1448 return add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1450 if (is_gimple_min_invariant (expr
))
1452 if (TREE_CODE (expr
) == SSA_NAME
)
1453 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1454 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1456 predicate p1
= will_be_nonconstant_expr_predicate
1457 (info
, summary
, TREE_OPERAND (expr
, 0),
1463 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1464 TREE_OPERAND (expr
, 1),
1466 return p1
.or_with (summary
->conds
, p2
);
1468 else if (TREE_CODE (expr
) == COND_EXPR
)
1470 predicate p1
= will_be_nonconstant_expr_predicate
1471 (info
, summary
, TREE_OPERAND (expr
, 0),
1477 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1478 TREE_OPERAND (expr
, 1),
1482 p1
= p1
.or_with (summary
->conds
, p2
);
1483 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1484 TREE_OPERAND (expr
, 2),
1486 return p2
.or_with (summary
->conds
, p1
);
1497 /* Return predicate specifying when the STMT might have result that is not
1498 a compile time constant. */
1501 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1502 struct ipa_fn_summary
*summary
,
1504 vec
<predicate
> nonconstant_names
)
1509 predicate op_non_const
;
1513 struct agg_position_info aggpos
;
1515 /* What statments might be optimized away
1516 when their arguments are constant. */
1517 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1518 && gimple_code (stmt
) != GIMPLE_COND
1519 && gimple_code (stmt
) != GIMPLE_SWITCH
1520 && (gimple_code (stmt
) != GIMPLE_CALL
1521 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1524 /* Stores will stay anyway. */
1525 if (gimple_store_p (stmt
))
1528 is_load
= gimple_assign_load_p (stmt
);
1530 /* Loads can be optimized when the value is known. */
1534 gcc_assert (gimple_assign_single_p (stmt
));
1535 op
= gimple_assign_rhs1 (stmt
);
1536 if (!unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, &base_index
, &size
,
1543 /* See if we understand all operands before we start
1544 adding conditionals. */
1545 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1547 tree parm
= unmodified_parm (stmt
, use
, NULL
);
1548 /* For arguments we can build a condition. */
1549 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1551 if (TREE_CODE (use
) != SSA_NAME
)
1553 /* If we know when operand is constant,
1554 we still can say something useful. */
1555 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1562 add_condition (summary
, base_index
, size
, &aggpos
, predicate::changed
,
1565 op_non_const
= false;
1566 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1569 tree parm
= unmodified_parm (stmt
, use
, &size
);
1572 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1574 if (index
!= base_index
)
1575 p
= add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1581 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1582 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1584 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1585 && gimple_op (stmt
, 0)
1586 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1587 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1589 return op_non_const
;
1592 struct record_modified_bb_info
1599 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1600 probability how often it changes between USE_BB.
1601 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1602 is in different loop nest, we can do better.
1603 This is all just estimate. In theory we look for minimal cut separating
1604 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1608 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1610 struct loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1611 if (l
&& l
->header
->count
< init_bb
->count
)
1616 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1617 set except for info->stmt. */
1620 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1622 struct record_modified_bb_info
*info
=
1623 (struct record_modified_bb_info
*) data
;
1624 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1626 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1628 bitmap_set_bit (info
->bb_set
,
1629 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1630 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1632 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1633 gimple_bb (info
->stmt
))->index
);
1636 fprintf (dump_file
, " Param ");
1637 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1638 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1639 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1641 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1642 gimple_bb (info
->stmt
))->index
);
1643 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
1648 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1649 will change since last invocation of STMT.
1651 Value 0 is reserved for compile time invariants.
1652 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1653 ought to be REG_BR_PROB_BASE / estimated_iters. */
1656 param_change_prob (gimple
*stmt
, int i
)
1658 tree op
= gimple_call_arg (stmt
, i
);
1659 basic_block bb
= gimple_bb (stmt
);
1661 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1662 op
= TREE_OPERAND (op
, 0);
1664 tree base
= get_base_address (op
);
1666 /* Global invariants never change. */
1667 if (is_gimple_min_invariant (base
))
1670 /* We would have to do non-trivial analysis to really work out what
1671 is the probability of value to change (i.e. when init statement
1672 is in a sibling loop of the call).
1674 We do an conservative estimate: when call is executed N times more often
1675 than the statement defining value, we take the frequency 1/N. */
1676 if (TREE_CODE (base
) == SSA_NAME
)
1678 profile_count init_count
;
1680 if (!bb
->count
.nonzero_p ())
1681 return REG_BR_PROB_BASE
;
1683 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1684 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1686 init_count
= get_minimal_bb
1687 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1688 gimple_bb (stmt
))->count
;
1690 if (init_count
< bb
->count
)
1691 return MAX ((init_count
.to_sreal_scale (bb
->count
)
1692 * REG_BR_PROB_BASE
).to_int (), 1);
1693 return REG_BR_PROB_BASE
;
1698 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1699 struct record_modified_bb_info info
;
1700 tree init
= ctor_for_folding (base
);
1702 if (init
!= error_mark_node
)
1704 if (!bb
->count
.nonzero_p ())
1705 return REG_BR_PROB_BASE
;
1708 fprintf (dump_file
, " Analyzing param change probablity of ");
1709 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1710 fprintf (dump_file
, "\n");
1712 ao_ref_init (&refd
, op
);
1715 info
.bb_set
= BITMAP_ALLOC (NULL
);
1716 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1718 if (bitmap_bit_p (info
.bb_set
, bb
->index
))
1721 fprintf (dump_file
, " Set in same BB as used.\n");
1722 BITMAP_FREE (info
.bb_set
);
1723 return REG_BR_PROB_BASE
;
1728 /* Lookup the most frequent update of the value and believe that
1729 it dominates all the other; precise analysis here is difficult. */
1730 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
1731 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
1734 fprintf (dump_file
, " Set with count ");
1735 max
.dump (dump_file
);
1736 fprintf (dump_file
, " and used with count ");
1737 bb
->count
.dump (dump_file
);
1738 fprintf (dump_file
, " freq %f\n",
1739 max
.to_sreal_scale (bb
->count
).to_double ());
1742 BITMAP_FREE (info
.bb_set
);
1743 if (max
< bb
->count
)
1744 return MAX ((max
.to_sreal_scale (bb
->count
)
1745 * REG_BR_PROB_BASE
).to_int (), 1);
1746 return REG_BR_PROB_BASE
;
1750 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1751 sub-graph and if the predicate the condition depends on is known. If so,
1752 return true and store the pointer the predicate in *P. */
1755 phi_result_unknown_predicate (struct ipa_node_params
*info
,
1756 ipa_fn_summary
*summary
, basic_block bb
,
1758 vec
<predicate
> nonconstant_names
)
1762 basic_block first_bb
= NULL
;
1765 if (single_pred_p (bb
))
1771 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1773 if (single_succ_p (e
->src
))
1775 if (!single_pred_p (e
->src
))
1778 first_bb
= single_pred (e
->src
);
1779 else if (single_pred (e
->src
) != first_bb
)
1786 else if (e
->src
!= first_bb
)
1794 stmt
= last_stmt (first_bb
);
1796 || gimple_code (stmt
) != GIMPLE_COND
1797 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
1800 *p
= will_be_nonconstant_expr_predicate (info
, summary
,
1801 gimple_cond_lhs (stmt
),
1809 /* Given a PHI statement in a function described by inline properties SUMMARY
1810 and *P being the predicate describing whether the selected PHI argument is
1811 known, store a predicate for the result of the PHI statement into
1812 NONCONSTANT_NAMES, if possible. */
1815 predicate_for_phi_result (struct ipa_fn_summary
*summary
, gphi
*phi
,
1817 vec
<predicate
> nonconstant_names
)
1821 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
1823 tree arg
= gimple_phi_arg (phi
, i
)->def
;
1824 if (!is_gimple_min_invariant (arg
))
1826 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
1827 *p
= p
->or_with (summary
->conds
,
1828 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
1834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1836 fprintf (dump_file
, "\t\tphi predicate: ");
1837 p
->dump (dump_file
, summary
->conds
);
1839 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
1842 /* Return predicate specifying when array index in access OP becomes non-constant. */
1845 array_index_predicate (ipa_fn_summary
*info
,
1846 vec
< predicate
> nonconstant_names
, tree op
)
1848 predicate p
= false;
1849 while (handled_component_p (op
))
1851 if (TREE_CODE (op
) == ARRAY_REF
|| TREE_CODE (op
) == ARRAY_RANGE_REF
)
1853 if (TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
1854 p
= p
.or_with (info
->conds
,
1855 nonconstant_names
[SSA_NAME_VERSION
1856 (TREE_OPERAND (op
, 1))]);
1858 op
= TREE_OPERAND (op
, 0);
1863 /* For a typical usage of __builtin_expect (a<b, 1), we
1864 may introduce an extra relation stmt:
1865 With the builtin, we have
1868 t3 = __builtin_expect (t2, 1);
1871 Without the builtin, we have
1874 This affects the size/time estimation and may have
1875 an impact on the earlier inlining.
1876 Here find this pattern and fix it up later. */
1879 find_foldable_builtin_expect (basic_block bb
)
1881 gimple_stmt_iterator bsi
;
1883 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1885 gimple
*stmt
= gsi_stmt (bsi
);
1886 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
1887 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
1889 tree var
= gimple_call_lhs (stmt
);
1890 tree arg
= gimple_call_arg (stmt
, 0);
1891 use_operand_p use_p
;
1898 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
1900 while (TREE_CODE (arg
) == SSA_NAME
)
1902 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
1903 if (!is_gimple_assign (stmt_tmp
))
1905 switch (gimple_assign_rhs_code (stmt_tmp
))
1924 arg
= gimple_assign_rhs1 (stmt_tmp
);
1927 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
1928 && gimple_code (use_stmt
) == GIMPLE_COND
)
1935 /* Return true when the basic blocks contains only clobbers followed by RESX.
1936 Such BBs are kept around to make removal of dead stores possible with
1937 presence of EH and will be optimized out by optimize_clobbers later in the
1940 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1941 that can be clobber only, too.. When it is false, the RESX is not necessary
1942 on the end of basic block. */
1945 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
1947 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1953 if (gsi_end_p (gsi
))
1955 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
1959 else if (!single_succ_p (bb
))
1962 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
1964 gimple
*stmt
= gsi_stmt (gsi
);
1965 if (is_gimple_debug (stmt
))
1967 if (gimple_clobber_p (stmt
))
1969 if (gimple_code (stmt
) == GIMPLE_LABEL
)
1974 /* See if all predecestors are either throws or clobber only BBs. */
1975 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1976 if (!(e
->flags
& EDGE_EH
)
1977 && !clobber_only_eh_bb_p (e
->src
, false))
1983 /* Return true if STMT compute a floating point expression that may be affected
1984 by -ffast-math and similar flags. */
1987 fp_expression_p (gimple
*stmt
)
1992 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
1993 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
1998 /* Analyze function body for NODE.
1999 EARLY indicates run from early optimization pipeline. */
2002 analyze_function_body (struct cgraph_node
*node
, bool early
)
2005 /* Estimate static overhead for function prologue/epilogue and alignment. */
2007 /* Benefits are scaled by probability of elimination that is in range
2010 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2012 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
2013 predicate bb_predicate
;
2014 struct ipa_func_body_info fbi
;
2015 vec
<predicate
> nonconstant_names
= vNULL
;
2018 predicate array_index
= true;
2019 gimple
*fix_builtin_expect_stmt
;
2021 gcc_assert (my_function
&& my_function
->cfg
);
2022 gcc_assert (cfun
== my_function
);
2024 memset(&fbi
, 0, sizeof(fbi
));
2026 info
->size_time_table
= NULL
;
2028 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2029 so we can produce proper inline hints.
2031 When optimizing and analyzing for early inliner, initialize node params
2032 so we can produce correct BB predicates. */
2034 if (opt_for_fn (node
->decl
, optimize
))
2036 calculate_dominance_info (CDI_DOMINATORS
);
2038 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2041 ipa_check_create_node_params ();
2042 ipa_initialize_node_params (node
);
2045 if (ipa_node_params_sum
)
2048 fbi
.info
= IPA_NODE_REF (node
);
2049 fbi
.bb_infos
= vNULL
;
2050 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2051 fbi
.param_count
= count_formal_params(node
->decl
);
2052 nonconstant_names
.safe_grow_cleared
2053 (SSANAMES (my_function
)->length ());
2058 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2061 /* When we run into maximal number of entries, we assign everything to the
2062 constant truth case. Be sure to have it in list. */
2063 bb_predicate
= true;
2064 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2066 bb_predicate
= predicate::not_inlined ();
2067 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, bb_predicate
,
2071 compute_bb_predicates (&fbi
, node
, info
);
2072 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2073 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2074 for (n
= 0; n
< nblocks
; n
++)
2076 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2077 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2078 if (clobber_only_eh_bb_p (bb
))
2080 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2081 fprintf (dump_file
, "\n Ignoring BB %i;"
2082 " it will be optimized away by cleanup_clobbers\n",
2087 /* TODO: Obviously predicates can be propagated down across CFG. */
2091 bb_predicate
= *(predicate
*) bb
->aux
;
2093 bb_predicate
= false;
2096 bb_predicate
= true;
2098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2100 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2101 bb_predicate
.dump (dump_file
, info
->conds
);
2104 if (fbi
.info
&& nonconstant_names
.exists ())
2106 predicate phi_predicate
;
2107 bool first_phi
= true;
2109 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2113 && !phi_result_unknown_predicate (fbi
.info
, info
, bb
,
2118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2120 fprintf (dump_file
, " ");
2121 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2123 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2128 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2130 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
2133 gimple
*stmt
= gsi_stmt (bsi
);
2134 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2135 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2137 predicate will_be_nonconstant
;
2139 /* This relation stmt should be folded after we remove
2140 buildin_expect call. Adjust the cost here. */
2141 if (stmt
== fix_builtin_expect_stmt
)
2147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2149 fprintf (dump_file
, " ");
2150 print_gimple_stmt (dump_file
, stmt
, 0);
2151 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2152 freq
.to_double (), this_size
,
2156 if (gimple_assign_load_p (stmt
) && nonconstant_names
.exists ())
2158 predicate this_array_index
;
2160 array_index_predicate (info
, nonconstant_names
,
2161 gimple_assign_rhs1 (stmt
));
2162 if (this_array_index
!= false)
2163 array_index
&= this_array_index
;
2165 if (gimple_store_p (stmt
) && nonconstant_names
.exists ())
2167 predicate this_array_index
;
2169 array_index_predicate (info
, nonconstant_names
,
2170 gimple_get_lhs (stmt
));
2171 if (this_array_index
!= false)
2172 array_index
&= this_array_index
;
2176 if (is_gimple_call (stmt
)
2177 && !gimple_call_internal_p (stmt
))
2179 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2180 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2182 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2183 resolved as constant. We however don't want to optimize
2184 out the cgraph edges. */
2185 if (nonconstant_names
.exists ()
2186 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2187 && gimple_call_lhs (stmt
)
2188 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2190 predicate false_p
= false;
2191 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2194 if (ipa_node_params_sum
)
2196 int count
= gimple_call_num_args (stmt
);
2200 es
->param
.safe_grow_cleared (count
);
2201 for (i
= 0; i
< count
; i
++)
2203 int prob
= param_change_prob (stmt
, i
);
2204 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2205 es
->param
[i
].change_prob
= prob
;
2209 es
->call_stmt_size
= this_size
;
2210 es
->call_stmt_time
= this_time
;
2211 es
->loop_depth
= bb_loop_depth (bb
);
2212 edge_set_predicate (edge
, &bb_predicate
);
2215 /* TODO: When conditional jump or swithc is known to be constant, but
2216 we did not translate it into the predicates, we really can account
2217 just maximum of the possible paths. */
2220 = will_be_nonconstant_predicate (&fbi
, info
,
2221 stmt
, nonconstant_names
);
2223 will_be_nonconstant
= true;
2224 if (this_time
|| this_size
)
2226 sreal final_time
= (sreal
)this_time
* freq
;
2228 prob
= eliminated_by_inlining_prob (stmt
);
2229 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2231 "\t\t50%% will be eliminated by inlining\n");
2232 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2233 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2235 struct predicate p
= bb_predicate
& will_be_nonconstant
;
2237 /* We can ignore statement when we proved it is never going
2238 to happen, but we can not do that for call statements
2239 because edges are accounted specially. */
2241 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2247 /* We account everything but the calls. Calls have their own
2248 size/time info attached to cgraph edges. This is necessary
2249 in order to make the cost disappear after inlining. */
2250 if (!is_gimple_call (stmt
))
2254 predicate ip
= bb_predicate
& predicate::not_inlined ();
2255 info
->account_size_time (this_size
* prob
,
2256 (this_time
* prob
) / 2, ip
,
2260 info
->account_size_time (this_size
* (2 - prob
),
2261 (this_time
* (2 - prob
) / 2),
2266 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2268 info
->fp_expressions
= true;
2270 fprintf (dump_file
, " fp_expression set\n");
2273 gcc_assert (time
>= 0);
2274 gcc_assert (size
>= 0);
2278 set_hint_predicate (&ipa_fn_summaries
->get (node
)->array_index
, array_index
);
2281 if (nonconstant_names
.exists () && !early
)
2284 predicate loop_iterations
= true;
2285 predicate loop_stride
= true;
2287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2288 flow_loops_dump (dump_file
, NULL
, 0);
2290 FOR_EACH_LOOP (loop
, 0)
2295 struct tree_niter_desc niter_desc
;
2296 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2298 exits
= get_loop_exit_edges (loop
);
2299 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2300 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2301 && !is_gimple_min_invariant (niter_desc
.niter
))
2303 predicate will_be_nonconstant
2304 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2307 if (will_be_nonconstant
!= true)
2308 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2309 if (will_be_nonconstant
!= true
2310 && will_be_nonconstant
!= false)
2311 /* This is slightly inprecise. We may want to represent each
2312 loop with independent predicate. */
2313 loop_iterations
&= will_be_nonconstant
;
2318 /* To avoid quadratic behavior we analyze stride predicates only
2319 with respect to the containing loop. Thus we simply iterate
2320 over all defs in the outermost loop body. */
2321 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2322 loop
!= NULL
; loop
= loop
->next
)
2324 basic_block
*body
= get_loop_body (loop
);
2325 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2327 gimple_stmt_iterator gsi
;
2328 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2329 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2332 gimple
*stmt
= gsi_stmt (gsi
);
2334 if (!is_gimple_assign (stmt
))
2337 tree def
= gimple_assign_lhs (stmt
);
2338 if (TREE_CODE (def
) != SSA_NAME
)
2342 if (!simple_iv (loop_containing_stmt (stmt
),
2343 loop_containing_stmt (stmt
),
2345 || is_gimple_min_invariant (iv
.step
))
2348 predicate will_be_nonconstant
2349 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2352 if (will_be_nonconstant
!= true)
2353 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2354 if (will_be_nonconstant
!= true
2355 && will_be_nonconstant
!= false)
2356 /* This is slightly inprecise. We may want to represent
2357 each loop with independent predicate. */
2358 loop_stride
= loop_stride
& will_be_nonconstant
;
2363 set_hint_predicate (&ipa_fn_summaries
->get (node
)->loop_iterations
,
2365 set_hint_predicate (&ipa_fn_summaries
->get (node
)->loop_stride
,
2369 FOR_ALL_BB_FN (bb
, my_function
)
2375 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2377 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2380 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2384 ipa_fn_summaries
->get (node
)->time
= time
;
2385 ipa_fn_summaries
->get (node
)->self_size
= size
;
2386 nonconstant_names
.release ();
2387 ipa_release_body_info (&fbi
);
2388 if (opt_for_fn (node
->decl
, optimize
))
2391 loop_optimizer_finalize ();
2392 else if (!ipa_edge_args_sum
)
2393 ipa_free_all_node_params ();
2394 free_dominance_info (CDI_DOMINATORS
);
2398 fprintf (dump_file
, "\n");
2399 ipa_dump_fn_summary (dump_file
, node
);
2404 /* Compute function summary.
2405 EARLY is true when we compute parameters during early opts. */
2408 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2410 HOST_WIDE_INT self_stack_size
;
2411 struct cgraph_edge
*e
;
2412 struct ipa_fn_summary
*info
;
2414 gcc_assert (!node
->global
.inlined_to
);
2416 if (!ipa_fn_summaries
)
2417 ipa_fn_summary_alloc ();
2419 info
= ipa_fn_summaries
->get (node
);
2422 /* Estimate the stack size for the function if we're optimizing. */
2423 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2424 ? estimated_stack_frame_size (node
) : 0;
2425 info
->estimated_self_stack_size
= self_stack_size
;
2426 info
->estimated_stack_size
= self_stack_size
;
2427 info
->stack_frame_offset
= 0;
2429 if (node
->thunk
.thunk_p
)
2431 struct ipa_call_summary
*es
= ipa_call_summaries
->get (node
->callees
);
2434 node
->local
.can_change_signature
= false;
2435 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2436 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2437 info
->account_size_time (ipa_fn_summary::size_scale
* 2, 2, t
, t
);
2438 t
= predicate::not_inlined ();
2439 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2440 ipa_update_overall_fn_summary (node
);
2441 info
->self_size
= info
->size
;
2442 /* We can not inline instrumentation clones. */
2443 if (node
->thunk
.add_pointer_bounds_args
)
2445 info
->inlinable
= false;
2446 node
->callees
->inline_failed
= CIF_CHKP
;
2448 else if (stdarg_p (TREE_TYPE (node
->decl
)))
2450 info
->inlinable
= false;
2451 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2454 info
->inlinable
= true;
2458 /* Even is_gimple_min_invariant rely on current_function_decl. */
2459 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2461 /* Can this function be inlined at all? */
2462 if (!opt_for_fn (node
->decl
, optimize
)
2463 && !lookup_attribute ("always_inline",
2464 DECL_ATTRIBUTES (node
->decl
)))
2465 info
->inlinable
= false;
2467 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2469 /* Type attributes can use parameter indices to describe them. */
2470 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2471 /* Likewise for #pragma omp declare simd functions or functions
2472 with simd attribute. */
2473 || lookup_attribute ("omp declare simd",
2474 DECL_ATTRIBUTES (node
->decl
)))
2475 node
->local
.can_change_signature
= false;
2478 /* Otherwise, inlinable functions always can change signature. */
2479 if (info
->inlinable
)
2480 node
->local
.can_change_signature
= true;
2483 /* Functions calling builtin_apply can not change signature. */
2484 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2486 tree
cdecl = e
->callee
->decl
;
2487 if (DECL_BUILT_IN (cdecl)
2488 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2489 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2490 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START
))
2493 node
->local
.can_change_signature
= !e
;
2496 /* Functions called by instrumentation thunk can't change signature
2497 because instrumentation thunk modification is not supported. */
2498 if (node
->local
.can_change_signature
)
2499 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2500 if (e
->caller
->thunk
.thunk_p
2501 && e
->caller
->thunk
.add_pointer_bounds_args
)
2503 node
->local
.can_change_signature
= false;
2506 analyze_function_body (node
, early
);
2509 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2510 if (e
->callee
->comdat_local_p ())
2512 node
->calls_comdat_local
= (e
!= NULL
);
2514 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2515 info
->size
= info
->self_size
;
2516 info
->stack_frame_offset
= 0;
2517 info
->estimated_stack_size
= info
->estimated_self_stack_size
;
2519 /* Code above should compute exactly the same result as
2520 ipa_update_overall_fn_summary but because computation happens in
2521 different order the roundoff errors result in slight changes. */
2522 ipa_update_overall_fn_summary (node
);
2523 gcc_assert (info
->size
== info
->self_size
);
2527 /* Compute parameters of functions used by inliner using
2528 current_function_decl. */
2531 compute_fn_summary_for_current (void)
2533 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2537 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2538 KNOWN_CONTEXTS and KNOWN_AGGS. */
2541 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2542 int *size
, int *time
,
2543 vec
<tree
> known_vals
,
2544 vec
<ipa_polymorphic_call_context
> known_contexts
,
2545 vec
<ipa_agg_jump_function_p
> known_aggs
)
2548 struct cgraph_node
*callee
;
2549 struct ipa_fn_summary
*isummary
;
2550 enum availability avail
;
2553 if (!known_vals
.exists () && !known_contexts
.exists ())
2555 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2558 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2559 known_aggs
, &speculative
);
2560 if (!target
|| speculative
)
2563 /* Account for difference in cost between indirect and direct calls. */
2564 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2565 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2566 gcc_checking_assert (*time
>= 0);
2567 gcc_checking_assert (*size
>= 0);
2569 callee
= cgraph_node::get (target
);
2570 if (!callee
|| !callee
->definition
)
2572 callee
= callee
->function_symbol (&avail
);
2573 if (avail
< AVAIL_AVAILABLE
)
2575 isummary
= ipa_fn_summaries
->get (callee
);
2576 return isummary
->inlinable
;
2579 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2580 handle edge E with probability PROB.
2581 Set HINTS if edge may be devirtualized.
2582 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2586 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2589 vec
<tree
> known_vals
,
2590 vec
<ipa_polymorphic_call_context
> known_contexts
,
2591 vec
<ipa_agg_jump_function_p
> known_aggs
,
2594 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2595 int call_size
= es
->call_stmt_size
;
2596 int call_time
= es
->call_stmt_time
;
2599 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2600 known_vals
, known_contexts
, known_aggs
)
2601 && hints
&& e
->maybe_hot_p ())
2602 *hints
|= INLINE_HINT_indirect_call
;
2603 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2606 *min_size
+= cur_size
;
2607 if (prob
== REG_BR_PROB_BASE
)
2608 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2610 *time
+= ((sreal
)call_time
* prob
) * e
->sreal_frequency ();
2615 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2616 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2617 describe context of the call site. */
2620 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2621 int *min_size
, sreal
*time
,
2623 clause_t possible_truths
,
2624 vec
<tree
> known_vals
,
2625 vec
<ipa_polymorphic_call_context
> known_contexts
,
2626 vec
<ipa_agg_jump_function_p
> known_aggs
)
2628 struct cgraph_edge
*e
;
2629 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2631 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2633 /* Do not care about zero sized builtins. */
2634 if (e
->inline_failed
&& !es
->call_stmt_size
)
2636 gcc_checking_assert (!es
->call_stmt_time
);
2640 || es
->predicate
->evaluate (possible_truths
))
2642 if (e
->inline_failed
)
2644 /* Predicates of calls shall not use NOT_CHANGED codes,
2645 sowe do not need to compute probabilities. */
2646 estimate_edge_size_and_time (e
, size
,
2647 es
->predicate
? NULL
: min_size
,
2648 time
, REG_BR_PROB_BASE
,
2649 known_vals
, known_contexts
,
2653 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2656 known_vals
, known_contexts
,
2660 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2662 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2664 || es
->predicate
->evaluate (possible_truths
))
2665 estimate_edge_size_and_time (e
, size
,
2666 es
->predicate
? NULL
: min_size
,
2667 time
, REG_BR_PROB_BASE
,
2668 known_vals
, known_contexts
, known_aggs
,
2674 /* Estimate size and time needed to execute NODE assuming
2675 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2676 information about NODE's arguments. If non-NULL use also probability
2677 information present in INLINE_PARAM_SUMMARY vector.
2678 Additionally detemine hints determined by the context. Finally compute
2679 minimal size needed for the call that is independent on the call context and
2680 can be used for fast estimates. Return the values in RET_SIZE,
2681 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2684 estimate_node_size_and_time (struct cgraph_node
*node
,
2685 clause_t possible_truths
,
2686 clause_t nonspec_possible_truths
,
2687 vec
<tree
> known_vals
,
2688 vec
<ipa_polymorphic_call_context
> known_contexts
,
2689 vec
<ipa_agg_jump_function_p
> known_aggs
,
2690 int *ret_size
, int *ret_min_size
,
2692 sreal
*ret_nonspecialized_time
,
2693 ipa_hints
*ret_hints
,
2694 vec
<inline_param_summary
>
2695 inline_param_summary
)
2697 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
2702 ipa_hints hints
= 0;
2705 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2708 fprintf (dump_file
, " Estimating body: %s/%i\n"
2709 " Known to be false: ", node
->name (),
2712 for (i
= predicate::not_inlined_condition
;
2713 i
< (predicate::first_dynamic_condition
2714 + (int) vec_safe_length (info
->conds
)); i
++)
2715 if (!(possible_truths
& (1 << i
)))
2718 fprintf (dump_file
, ", ");
2720 dump_condition (dump_file
, info
->conds
, i
);
2724 estimate_calls_size_and_time (node
, &size
, &min_size
, &time
, &hints
, possible_truths
,
2725 known_vals
, known_contexts
, known_aggs
);
2726 sreal nonspecialized_time
= time
;
2728 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
2730 bool exec
= e
->exec_predicate
.evaluate (nonspec_possible_truths
);
2732 /* Because predicates are conservative, it can happen that nonconst is 1
2736 bool nonconst
= e
->nonconst_predicate
.evaluate (possible_truths
);
2738 gcc_checking_assert (e
->time
>= 0);
2739 gcc_checking_assert (time
>= 0);
2741 /* We compute specialized size only because size of nonspecialized
2742 copy is context independent.
2744 The difference between nonspecialized execution and specialized is
2745 that nonspecialized is not going to have optimized out computations
2746 known to be constant in a specialized setting. */
2749 nonspecialized_time
+= e
->time
;
2752 else if (!inline_param_summary
.exists ())
2759 int prob
= e
->nonconst_predicate
.probability
2760 (info
->conds
, possible_truths
,
2761 inline_param_summary
);
2762 gcc_checking_assert (prob
>= 0);
2763 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
2764 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
2766 gcc_checking_assert (time
>= 0);
2769 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
2770 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
2771 min_size
= (*info
->size_time_table
)[0].size
;
2772 gcc_checking_assert (size
>= 0);
2773 gcc_checking_assert (time
>= 0);
2774 /* nonspecialized_time should be always bigger than specialized time.
2775 Roundoff issues however may get into the way. */
2776 gcc_checking_assert ((nonspecialized_time
- time
* 0.99) >= -1);
2778 /* Roundoff issues may make specialized time bigger than nonspecialized
2779 time. We do not really want that to happen because some heurstics
2780 may get confused by seeing negative speedups. */
2781 if (time
> nonspecialized_time
)
2782 time
= nonspecialized_time
;
2784 if (info
->loop_iterations
2785 && !info
->loop_iterations
->evaluate (possible_truths
))
2786 hints
|= INLINE_HINT_loop_iterations
;
2787 if (info
->loop_stride
2788 && !info
->loop_stride
->evaluate (possible_truths
))
2789 hints
|= INLINE_HINT_loop_stride
;
2790 if (info
->array_index
2791 && !info
->array_index
->evaluate (possible_truths
))
2792 hints
|= INLINE_HINT_array_index
;
2794 hints
|= INLINE_HINT_in_scc
;
2795 if (DECL_DECLARED_INLINE_P (node
->decl
))
2796 hints
|= INLINE_HINT_declared_inline
;
2798 size
= RDIV (size
, ipa_fn_summary::size_scale
);
2799 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
2801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2802 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
2803 time
.to_double (), nonspecialized_time
.to_double ());
2806 if (ret_nonspecialized_time
)
2807 *ret_nonspecialized_time
= nonspecialized_time
;
2811 *ret_min_size
= min_size
;
2818 /* Estimate size and time needed to execute callee of EDGE assuming that
2819 parameters known to be constant at caller of EDGE are propagated.
2820 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2821 and types for parameters. */
2824 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
2825 vec
<tree
> known_vals
,
2826 vec
<ipa_polymorphic_call_context
>
2828 vec
<ipa_agg_jump_function_p
> known_aggs
,
2829 int *ret_size
, sreal
*ret_time
,
2830 sreal
*ret_nonspec_time
,
2833 clause_t clause
, nonspec_clause
;
2835 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
2836 &clause
, &nonspec_clause
);
2837 estimate_node_size_and_time (node
, clause
, nonspec_clause
,
2838 known_vals
, known_contexts
,
2839 known_aggs
, ret_size
, NULL
, ret_time
,
2840 ret_nonspec_time
, hints
, vNULL
);
2844 /* Update summary information of inline clones after inlining.
2845 Compute peak stack usage. */
2848 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
2850 struct cgraph_edge
*e
;
2851 struct ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (node
);
2852 struct ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (node
->callers
->caller
);
2855 callee_info
->stack_frame_offset
2856 = caller_info
->stack_frame_offset
2857 + caller_info
->estimated_self_stack_size
;
2858 peak
= callee_info
->stack_frame_offset
2859 + callee_info
->estimated_self_stack_size
;
2860 if (ipa_fn_summaries
->get (node
->global
.inlined_to
)->estimated_stack_size
< peak
)
2861 ipa_fn_summaries
->get (node
->global
.inlined_to
)->estimated_stack_size
= peak
;
2862 ipa_propagate_frequency (node
);
2863 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2865 if (!e
->inline_failed
)
2866 inline_update_callee_summaries (e
->callee
, depth
);
2867 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2869 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2870 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2873 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2874 When functoin A is inlined in B and A calls C with parameter that
2875 changes with probability PROB1 and C is known to be passthroug
2876 of argument if B that change with probability PROB2, the probability
2877 of change is now PROB1*PROB2. */
2880 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
2881 struct cgraph_edge
*edge
)
2883 if (ipa_node_params_sum
)
2886 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2887 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2888 struct ipa_call_summary
*inlined_es
2889 = ipa_call_summaries
->get (inlined_edge
);
2891 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
2893 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
2894 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2895 || jfunc
->type
== IPA_JF_ANCESTOR
)
2897 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
2898 ? ipa_get_jf_pass_through_formal_id (jfunc
)
2899 : ipa_get_jf_ancestor_formal_id (jfunc
);
2900 if (id
< (int) inlined_es
->param
.length ())
2902 int prob1
= es
->param
[i
].change_prob
;
2903 int prob2
= inlined_es
->param
[id
].change_prob
;
2904 int prob
= combine_probabilities (prob1
, prob2
);
2906 if (prob1
&& prob2
&& !prob
)
2909 es
->param
[i
].change_prob
= prob
;
2916 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2918 Remap predicates of callees of NODE. Rest of arguments match
2921 Also update change probabilities. */
2924 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
2925 struct cgraph_node
*node
,
2926 struct ipa_fn_summary
*info
,
2927 struct ipa_fn_summary
*callee_info
,
2928 vec
<int> operand_map
,
2929 vec
<int> offset_map
,
2930 clause_t possible_truths
,
2931 predicate
*toplev_predicate
)
2933 struct cgraph_edge
*e
, *next
;
2934 for (e
= node
->callees
; e
; e
= next
)
2936 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2938 next
= e
->next_callee
;
2940 if (e
->inline_failed
)
2942 remap_edge_change_prob (inlined_edge
, e
);
2946 p
= es
->predicate
->remap_after_inlining
2947 (info
, callee_info
, operand_map
,
2948 offset_map
, possible_truths
,
2950 edge_set_predicate (e
, &p
);
2953 edge_set_predicate (e
, toplev_predicate
);
2956 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
2957 operand_map
, offset_map
, possible_truths
,
2960 for (e
= node
->indirect_calls
; e
; e
= next
)
2962 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2964 next
= e
->next_callee
;
2966 remap_edge_change_prob (inlined_edge
, e
);
2969 p
= es
->predicate
->remap_after_inlining
2970 (info
, callee_info
, operand_map
, offset_map
,
2971 possible_truths
, *toplev_predicate
);
2972 edge_set_predicate (e
, &p
);
2975 edge_set_predicate (e
, toplev_predicate
);
2979 /* Same as remap_predicate, but set result into hint *HINT. */
2982 remap_hint_predicate (struct ipa_fn_summary
*info
,
2983 struct ipa_fn_summary
*callee_info
,
2985 vec
<int> operand_map
,
2986 vec
<int> offset_map
,
2987 clause_t possible_truths
,
2988 predicate
*toplev_predicate
)
2994 p
= (*hint
)->remap_after_inlining
2996 operand_map
, offset_map
,
2997 possible_truths
, *toplev_predicate
);
2998 if (p
!= false && p
!= true)
3001 set_hint_predicate (hint
, p
);
3007 /* We inlined EDGE. Update summary of the function we inlined into. */
3010 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
3012 struct ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
3013 struct cgraph_node
*to
= (edge
->caller
->global
.inlined_to
3014 ? edge
->caller
->global
.inlined_to
: edge
->caller
);
3015 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
3016 clause_t clause
= 0; /* not_inline is known to be false. */
3018 vec
<int> operand_map
= vNULL
;
3019 vec
<int> offset_map
= vNULL
;
3021 predicate toplev_predicate
;
3022 predicate true_p
= true;
3023 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3026 toplev_predicate
= *es
->predicate
;
3028 toplev_predicate
= true;
3030 info
->fp_expressions
|= callee_info
->fp_expressions
;
3032 if (callee_info
->conds
)
3033 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
3034 if (ipa_node_params_sum
&& callee_info
->conds
)
3036 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3037 int count
= ipa_get_cs_argument_count (args
);
3042 operand_map
.safe_grow_cleared (count
);
3043 offset_map
.safe_grow_cleared (count
);
3045 for (i
= 0; i
< count
; i
++)
3047 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3050 /* TODO: handle non-NOPs when merging. */
3051 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3053 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3054 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3055 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3058 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3060 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3061 if (offset
>= 0 && offset
< INT_MAX
)
3063 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3064 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3066 offset_map
[i
] = offset
;
3069 operand_map
[i
] = map
;
3070 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3073 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3076 p
= e
->exec_predicate
.remap_after_inlining
3077 (info
, callee_info
, operand_map
,
3080 predicate nonconstp
;
3081 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3082 (info
, callee_info
, operand_map
,
3085 if (p
!= false && nonconstp
!= false)
3087 sreal add_time
= ((sreal
)e
->time
* edge
->sreal_frequency ());
3088 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3090 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3091 if (prob
!= REG_BR_PROB_BASE
3092 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3094 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3095 (double) prob
/ REG_BR_PROB_BASE
);
3097 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3100 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3101 offset_map
, clause
, &toplev_predicate
);
3102 remap_hint_predicate (info
, callee_info
,
3103 &callee_info
->loop_iterations
,
3104 operand_map
, offset_map
, clause
, &toplev_predicate
);
3105 remap_hint_predicate (info
, callee_info
,
3106 &callee_info
->loop_stride
,
3107 operand_map
, offset_map
, clause
, &toplev_predicate
);
3108 remap_hint_predicate (info
, callee_info
,
3109 &callee_info
->array_index
,
3110 operand_map
, offset_map
, clause
, &toplev_predicate
);
3112 inline_update_callee_summaries (edge
->callee
,
3113 ipa_call_summaries
->get (edge
)->loop_depth
);
3115 /* We do not maintain predicates of inlined edges, free it. */
3116 edge_set_predicate (edge
, &true_p
);
3117 /* Similarly remove param summaries. */
3118 es
->param
.release ();
3119 operand_map
.release ();
3120 offset_map
.release ();
3123 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3124 and time. Recompute it. */
3127 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3129 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
3135 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3137 info
->size
+= e
->size
;
3138 info
->time
+= e
->time
;
3140 estimate_calls_size_and_time (node
, &info
->size
, &info
->min_size
,
3142 ~(clause_t
) (1 << predicate::false_condition
),
3143 vNULL
, vNULL
, vNULL
);
3144 info
->size
= (info
->size
+ ipa_fn_summary::size_scale
/ 2) / ipa_fn_summary::size_scale
;
3148 /* This function performs intraprocedural analysis in NODE that is required to
3149 inline indirect calls. */
3152 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3154 ipa_analyze_node (node
);
3155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3157 ipa_print_node_params (dump_file
, node
);
3158 ipa_print_node_jump_functions (dump_file
, node
);
3163 /* Note function body size. */
3166 inline_analyze_function (struct cgraph_node
*node
)
3168 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3171 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3172 node
->name (), node
->order
);
3173 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3174 inline_indirect_intraprocedural_analysis (node
);
3175 compute_fn_summary (node
, false);
3178 struct cgraph_edge
*e
;
3179 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3180 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3181 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3182 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3189 /* Called when new function is inserted to callgraph late. */
3192 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3194 inline_analyze_function (node
);
3197 /* Note function body size. */
3200 ipa_fn_summary_generate (void)
3202 struct cgraph_node
*node
;
3204 FOR_EACH_DEFINED_FUNCTION (node
)
3205 if (DECL_STRUCT_FUNCTION (node
->decl
))
3206 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
3208 ipa_fn_summary_alloc ();
3210 ipa_fn_summaries
->enable_insertion_hook ();
3212 ipa_register_cgraph_hooks ();
3214 FOR_EACH_DEFINED_FUNCTION (node
)
3216 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3217 || opt_for_fn (node
->decl
, optimize
)))
3218 inline_analyze_function (node
);
3222 /* Write inline summary for edge E to OB. */
3225 read_ipa_call_summary (struct lto_input_block
*ib
, struct cgraph_edge
*e
)
3227 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3231 es
->call_stmt_size
= streamer_read_uhwi (ib
);
3232 es
->call_stmt_time
= streamer_read_uhwi (ib
);
3233 es
->loop_depth
= streamer_read_uhwi (ib
);
3235 bitpack_d bp
= streamer_read_bitpack (ib
);
3236 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3239 edge_set_predicate (e
, &p
);
3240 length
= streamer_read_uhwi (ib
);
3243 es
->param
.safe_grow_cleared (length
);
3244 for (i
= 0; i
< length
; i
++)
3245 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3250 /* Stream in inline summaries from the section. */
3253 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3256 const struct lto_function_header
*header
=
3257 (const struct lto_function_header
*) data
;
3258 const int cfg_offset
= sizeof (struct lto_function_header
);
3259 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3260 const int string_offset
= main_offset
+ header
->main_size
;
3261 struct data_in
*data_in
;
3262 unsigned int i
, count2
, j
;
3263 unsigned int f_count
;
3265 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3266 file_data
->mode_table
);
3269 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3270 header
->string_size
, vNULL
);
3271 f_count
= streamer_read_uhwi (&ib
);
3272 for (i
= 0; i
< f_count
; i
++)
3275 struct cgraph_node
*node
;
3276 struct ipa_fn_summary
*info
;
3277 lto_symtab_encoder_t encoder
;
3278 struct bitpack_d bp
;
3279 struct cgraph_edge
*e
;
3282 index
= streamer_read_uhwi (&ib
);
3283 encoder
= file_data
->symtab_node_encoder
;
3284 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3286 info
= ipa_fn_summaries
->get (node
);
3288 info
->estimated_stack_size
3289 = info
->estimated_self_stack_size
= streamer_read_uhwi (&ib
);
3290 info
->size
= info
->self_size
= streamer_read_uhwi (&ib
);
3291 info
->time
= sreal::stream_in (&ib
);
3293 bp
= streamer_read_bitpack (&ib
);
3294 info
->inlinable
= bp_unpack_value (&bp
, 1);
3295 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3297 count2
= streamer_read_uhwi (&ib
);
3298 gcc_assert (!info
->conds
);
3299 for (j
= 0; j
< count2
; j
++)
3302 c
.operand_num
= streamer_read_uhwi (&ib
);
3303 c
.size
= streamer_read_uhwi (&ib
);
3304 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3305 c
.val
= stream_read_tree (&ib
, data_in
);
3306 bp
= streamer_read_bitpack (&ib
);
3307 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3308 c
.by_ref
= bp_unpack_value (&bp
, 1);
3310 c
.offset
= streamer_read_uhwi (&ib
);
3311 vec_safe_push (info
->conds
, c
);
3313 count2
= streamer_read_uhwi (&ib
);
3314 gcc_assert (!info
->size_time_table
);
3315 for (j
= 0; j
< count2
; j
++)
3317 struct size_time_entry e
;
3319 e
.size
= streamer_read_uhwi (&ib
);
3320 e
.time
= sreal::stream_in (&ib
);
3321 e
.exec_predicate
.stream_in (&ib
);
3322 e
.nonconst_predicate
.stream_in (&ib
);
3324 vec_safe_push (info
->size_time_table
, e
);
3328 set_hint_predicate (&info
->loop_iterations
, p
);
3330 set_hint_predicate (&info
->loop_stride
, p
);
3332 set_hint_predicate (&info
->array_index
, p
);
3333 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3334 read_ipa_call_summary (&ib
, e
);
3335 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3336 read_ipa_call_summary (&ib
, e
);
3339 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3341 lto_data_in_delete (data_in
);
3345 /* Read inline summary. Jump functions are shared among ipa-cp
3346 and inliner, so when ipa-cp is active, we don't need to write them
3350 ipa_fn_summary_read (void)
3352 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3353 struct lto_file_decl_data
*file_data
;
3356 ipa_fn_summary_alloc ();
3358 while ((file_data
= file_data_vec
[j
++]))
3361 const char *data
= lto_get_section_data (file_data
,
3362 LTO_section_ipa_fn_summary
,
3365 inline_read_section (file_data
, data
, len
);
3367 /* Fatal error here. We do not want to support compiling ltrans units
3368 with different version of compiler or different flags than the WPA
3369 unit, so this should never happen. */
3370 fatal_error (input_location
,
3371 "ipa inline summary is missing in input file");
3373 ipa_register_cgraph_hooks ();
3375 ipa_prop_read_jump_functions ();
3377 gcc_assert (ipa_fn_summaries
);
3378 ipa_fn_summaries
->enable_insertion_hook ();
3382 /* Write inline summary for edge E to OB. */
3385 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3387 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3390 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3391 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3392 streamer_write_uhwi (ob
, es
->loop_depth
);
3394 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3395 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3396 streamer_write_bitpack (&bp
);
3399 es
->predicate
->stream_out (ob
);
3401 streamer_write_uhwi (ob
, 0);
3402 streamer_write_uhwi (ob
, es
->param
.length ());
3403 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3404 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3408 /* Write inline summary for node in SET.
3409 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3410 active, we don't need to write them twice. */
3413 ipa_fn_summary_write (void)
3415 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3416 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3417 unsigned int count
= 0;
3420 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3422 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3423 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3424 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3427 streamer_write_uhwi (ob
, count
);
3429 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3431 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3432 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3433 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3435 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3436 struct bitpack_d bp
;
3437 struct cgraph_edge
*edge
;
3440 struct condition
*c
;
3442 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3443 streamer_write_hwi (ob
, info
->estimated_self_stack_size
);
3444 streamer_write_hwi (ob
, info
->self_size
);
3445 info
->time
.stream_out (ob
);
3446 bp
= bitpack_create (ob
->main_stream
);
3447 bp_pack_value (&bp
, info
->inlinable
, 1);
3448 bp_pack_value (&bp
, false, 1);
3449 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3450 streamer_write_bitpack (&bp
);
3451 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3452 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3454 streamer_write_uhwi (ob
, c
->operand_num
);
3455 streamer_write_uhwi (ob
, c
->size
);
3456 streamer_write_uhwi (ob
, c
->code
);
3457 stream_write_tree (ob
, c
->val
, true);
3458 bp
= bitpack_create (ob
->main_stream
);
3459 bp_pack_value (&bp
, c
->agg_contents
, 1);
3460 bp_pack_value (&bp
, c
->by_ref
, 1);
3461 streamer_write_bitpack (&bp
);
3462 if (c
->agg_contents
)
3463 streamer_write_uhwi (ob
, c
->offset
);
3465 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3466 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3468 streamer_write_uhwi (ob
, e
->size
);
3469 e
->time
.stream_out (ob
);
3470 e
->exec_predicate
.stream_out (ob
);
3471 e
->nonconst_predicate
.stream_out (ob
);
3473 if (info
->loop_iterations
)
3474 info
->loop_iterations
->stream_out (ob
);
3476 streamer_write_uhwi (ob
, 0);
3477 if (info
->loop_stride
)
3478 info
->loop_stride
->stream_out (ob
);
3480 streamer_write_uhwi (ob
, 0);
3481 if (info
->array_index
)
3482 info
->array_index
->stream_out (ob
);
3484 streamer_write_uhwi (ob
, 0);
3485 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3486 write_ipa_call_summary (ob
, edge
);
3487 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3488 write_ipa_call_summary (ob
, edge
);
3491 streamer_write_char_stream (ob
->main_stream
, 0);
3492 produce_asm (ob
, NULL
);
3493 destroy_output_block (ob
);
3496 ipa_prop_write_jump_functions ();
3500 /* Release inline summary. */
3503 ipa_free_fn_summary (void)
3505 struct cgraph_node
*node
;
3506 if (!ipa_call_summaries
)
3508 FOR_EACH_DEFINED_FUNCTION (node
)
3510 ipa_fn_summaries
->get (node
)->reset (node
);
3511 ipa_fn_summaries
->release ();
3512 ipa_fn_summaries
= NULL
;
3513 ipa_call_summaries
->release ();
3514 delete ipa_call_summaries
;
3515 ipa_call_summaries
= NULL
;
3516 edge_predicate_pool
.release ();
3521 const pass_data pass_data_local_fn_summary
=
3523 GIMPLE_PASS
, /* type */
3524 "local-fnsummary", /* name */
3525 OPTGROUP_INLINE
, /* optinfo_flags */
3526 TV_INLINE_PARAMETERS
, /* tv_id */
3527 0, /* properties_required */
3528 0, /* properties_provided */
3529 0, /* properties_destroyed */
3530 0, /* todo_flags_start */
3531 0, /* todo_flags_finish */
3534 class pass_local_fn_summary
: public gimple_opt_pass
3537 pass_local_fn_summary (gcc::context
*ctxt
)
3538 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3541 /* opt_pass methods: */
3542 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3543 virtual unsigned int execute (function
*)
3545 return compute_fn_summary_for_current ();
3548 }; // class pass_local_fn_summary
3553 make_pass_local_fn_summary (gcc::context
*ctxt
)
3555 return new pass_local_fn_summary (ctxt
);
3559 /* Free inline summary. */
3563 const pass_data pass_data_ipa_free_fn_summary
=
3565 SIMPLE_IPA_PASS
, /* type */
3566 "free-fnsummary", /* name */
3567 OPTGROUP_NONE
, /* optinfo_flags */
3568 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3569 0, /* properties_required */
3570 0, /* properties_provided */
3571 0, /* properties_destroyed */
3572 0, /* todo_flags_start */
3573 0, /* todo_flags_finish */
3576 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3579 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3580 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
3584 /* opt_pass methods: */
3585 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
3586 void set_pass_param (unsigned int n
, bool param
)
3588 gcc_assert (n
== 0);
3591 virtual bool gate (function
*) { return small_p
|| !flag_wpa
; }
3592 virtual unsigned int execute (function
*)
3594 ipa_free_fn_summary ();
3595 /* Early optimizations may make function unreachable. We can not
3596 remove unreachable functions as part of the early opts pass because
3597 TODOs are run before subpasses. Do it here. */
3598 return small_p
? TODO_remove_functions
| TODO_dump_symtab
: 0;
3603 }; // class pass_ipa_free_fn_summary
3607 simple_ipa_opt_pass
*
3608 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3610 return new pass_ipa_free_fn_summary (ctxt
);
3615 const pass_data pass_data_ipa_fn_summary
=
3617 IPA_PASS
, /* type */
3618 "fnsummary", /* name */
3619 OPTGROUP_INLINE
, /* optinfo_flags */
3620 TV_IPA_FNSUMMARY
, /* tv_id */
3621 0, /* properties_required */
3622 0, /* properties_provided */
3623 0, /* properties_destroyed */
3624 0, /* todo_flags_start */
3625 ( TODO_dump_symtab
), /* todo_flags_finish */
3628 class pass_ipa_fn_summary
: public ipa_opt_pass_d
3631 pass_ipa_fn_summary (gcc::context
*ctxt
)
3632 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
3633 ipa_fn_summary_generate
, /* generate_summary */
3634 ipa_fn_summary_write
, /* write_summary */
3635 ipa_fn_summary_read
, /* read_summary */
3636 NULL
, /* write_optimization_summary */
3637 NULL
, /* read_optimization_summary */
3638 NULL
, /* stmt_fixup */
3639 0, /* function_transform_todo_flags_start */
3640 NULL
, /* function_transform */
3641 NULL
) /* variable_transform */
3644 /* opt_pass methods: */
3645 virtual unsigned int execute (function
*) { return 0; }
3647 }; // class pass_ipa_fn_summary
3652 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
3654 return new pass_ipa_fn_summary (ctxt
);
3657 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3658 within the same process. For use by toplev::finalize. */
3661 ipa_fnsummary_c_finalize (void)
3663 ipa_free_fn_summary ();