1 /* Function summary pass.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
83 #include "cfgexpand.h"
85 #include "stringpool.h"
89 function_summary
<ipa_fn_summary
*> *ipa_fn_summaries
;
90 call_summary
<ipa_call_summary
*> *ipa_call_summaries
;
92 /* Edge predicates goes here. */
93 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
98 ipa_dump_hints (FILE *f
, ipa_hints hints
)
102 fprintf (f
, "IPA hints:");
103 if (hints
& INLINE_HINT_indirect_call
)
105 hints
&= ~INLINE_HINT_indirect_call
;
106 fprintf (f
, " indirect_call");
108 if (hints
& INLINE_HINT_loop_iterations
)
110 hints
&= ~INLINE_HINT_loop_iterations
;
111 fprintf (f
, " loop_iterations");
113 if (hints
& INLINE_HINT_loop_stride
)
115 hints
&= ~INLINE_HINT_loop_stride
;
116 fprintf (f
, " loop_stride");
118 if (hints
& INLINE_HINT_same_scc
)
120 hints
&= ~INLINE_HINT_same_scc
;
121 fprintf (f
, " same_scc");
123 if (hints
& INLINE_HINT_in_scc
)
125 hints
&= ~INLINE_HINT_in_scc
;
126 fprintf (f
, " in_scc");
128 if (hints
& INLINE_HINT_cross_module
)
130 hints
&= ~INLINE_HINT_cross_module
;
131 fprintf (f
, " cross_module");
133 if (hints
& INLINE_HINT_declared_inline
)
135 hints
&= ~INLINE_HINT_declared_inline
;
136 fprintf (f
, " declared_inline");
138 if (hints
& INLINE_HINT_array_index
)
140 hints
&= ~INLINE_HINT_array_index
;
141 fprintf (f
, " array_index");
143 if (hints
& INLINE_HINT_known_hot
)
145 hints
&= ~INLINE_HINT_known_hot
;
146 fprintf (f
, " known_hot");
152 /* Record SIZE and TIME to SUMMARY.
153 The accounted code will be executed when EXEC_PRED is true.
154 When NONCONST_PRED is false the code will evaulate to constant and
155 will get optimized out in specialized clones of the function. */
158 ipa_fn_summary::account_size_time (int size
, sreal time
,
159 const predicate
&exec_pred
,
160 const predicate
&nonconst_pred_in
)
165 predicate nonconst_pred
;
167 if (exec_pred
== false)
170 nonconst_pred
= nonconst_pred_in
& exec_pred
;
172 if (nonconst_pred
== false)
175 /* We need to create initial empty unconitional clause, but otherwie
176 we don't need to account empty times and sizes. */
177 if (!size
&& time
== 0 && size_time_table
)
180 gcc_assert (time
>= 0);
182 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
183 if (e
->exec_predicate
== exec_pred
184 && e
->nonconst_predicate
== nonconst_pred
)
193 e
= &(*size_time_table
)[0];
194 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
196 "\t\tReached limit on number of entries, "
197 "ignoring the predicate.");
199 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
202 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
203 ((double) size
) / ipa_fn_summary::size_scale
,
204 (time
.to_double ()), found
? "" : "new ");
205 exec_pred
.dump (dump_file
, conds
, 0);
206 if (exec_pred
!= nonconst_pred
)
208 fprintf (dump_file
, " nonconst:");
209 nonconst_pred
.dump (dump_file
, conds
);
212 fprintf (dump_file
, "\n");
216 struct size_time_entry new_entry
;
217 new_entry
.size
= size
;
218 new_entry
.time
= time
;
219 new_entry
.exec_predicate
= exec_pred
;
220 new_entry
.nonconst_predicate
= nonconst_pred
;
221 vec_safe_push (size_time_table
, new_entry
);
230 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
232 static struct cgraph_edge
*
233 redirect_to_unreachable (struct cgraph_edge
*e
)
235 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
236 struct cgraph_node
*target
= cgraph_node::get_create
237 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
240 e
= e
->resolve_speculation (target
->decl
);
242 e
->make_direct (target
);
244 e
->redirect_callee (target
);
245 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
246 e
->inline_failed
= CIF_UNREACHABLE
;
248 e
->count
= profile_count::zero ();
249 es
->call_stmt_size
= 0;
250 es
->call_stmt_time
= 0;
252 callee
->remove_symbol_and_inline_clones ();
256 /* Set predicate for edge E. */
259 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
261 /* If the edge is determined to be never executed, redirect it
262 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
264 if (predicate
&& *predicate
== false
265 /* When handling speculative edges, we need to do the redirection
266 just once. Do it always on the direct edge, so we do not
267 attempt to resolve speculation while duplicating the edge. */
268 && (!e
->speculative
|| e
->callee
))
269 e
= redirect_to_unreachable (e
);
271 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
272 if (predicate
&& *predicate
!= true)
275 es
->predicate
= edge_predicate_pool
.allocate ();
276 *es
->predicate
= *predicate
;
281 edge_predicate_pool
.remove (es
->predicate
);
282 es
->predicate
= NULL
;
286 /* Set predicate for hint *P. */
289 set_hint_predicate (predicate
**p
, predicate new_predicate
)
291 if (new_predicate
== false || new_predicate
== true)
294 edge_predicate_pool
.remove (*p
);
300 *p
= edge_predicate_pool
.allocate ();
306 /* Compute what conditions may or may not hold given invormation about
307 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
308 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
309 copy when called in a given context. It is a bitmask of conditions. Bit
310 0 means that condition is known to be false, while bit 1 means that condition
311 may or may not be true. These differs - for example NOT_INLINED condition
312 is always false in the second and also builtin_constant_p tests can not use
313 the fact that parameter is indeed a constant.
315 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
316 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
317 Return clause of possible truths. When INLINE_P is true, assume that we are
320 ERROR_MARK means compile time invariant. */
323 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
325 vec
<tree
> known_vals
,
326 vec
<ipa_agg_jump_function_p
>
328 clause_t
*ret_clause
,
329 clause_t
*ret_nonspec_clause
)
331 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
332 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
333 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
337 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
342 /* We allow call stmt to have fewer arguments than the callee function
343 (especially for K&R style programs). So bound check here (we assume
344 known_aggs vector, if non-NULL, has the same length as
346 gcc_checking_assert (!known_aggs
.exists ()
347 || (known_vals
.length () == known_aggs
.length ()));
348 if (c
->operand_num
>= (int) known_vals
.length ())
350 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
351 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
357 struct ipa_agg_jump_function
*agg
;
359 if (c
->code
== predicate::changed
361 && (known_vals
[c
->operand_num
] == error_mark_node
))
364 if (known_aggs
.exists ())
366 agg
= known_aggs
[c
->operand_num
];
367 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
368 c
->offset
, c
->by_ref
);
375 val
= known_vals
[c
->operand_num
];
376 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
382 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
383 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
386 if (c
->code
== predicate::changed
)
388 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
392 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val
))) != c
->size
)
394 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
395 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
398 if (c
->code
== predicate::is_not_constant
)
400 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
404 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (c
->val
), val
);
406 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
409 if (res
&& integer_zerop (res
))
412 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
413 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
415 *ret_clause
= clause
;
416 if (ret_nonspec_clause
)
417 *ret_nonspec_clause
= nonspec_clause
;
421 /* Work out what conditions might be true at invocation of E. */
424 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
425 clause_t
*clause_ptr
,
426 clause_t
*nonspec_clause_ptr
,
427 vec
<tree
> *known_vals_ptr
,
428 vec
<ipa_polymorphic_call_context
>
430 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
432 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
433 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
434 vec
<tree
> known_vals
= vNULL
;
435 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
438 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
440 known_vals_ptr
->create (0);
441 if (known_contexts_ptr
)
442 known_contexts_ptr
->create (0);
444 if (ipa_node_params_sum
445 && !e
->call_stmt_cannot_inline_p
446 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
))
448 struct ipa_node_params
*parms_info
;
449 struct ipa_edge_args
*args
= IPA_EDGE_REF (e
);
450 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
451 int i
, count
= ipa_get_cs_argument_count (args
);
453 if (e
->caller
->global
.inlined_to
)
454 parms_info
= IPA_NODE_REF (e
->caller
->global
.inlined_to
);
456 parms_info
= IPA_NODE_REF (e
->caller
);
458 if (count
&& (info
->conds
|| known_vals_ptr
))
459 known_vals
.safe_grow_cleared (count
);
460 if (count
&& (info
->conds
|| known_aggs_ptr
))
461 known_aggs
.safe_grow_cleared (count
);
462 if (count
&& known_contexts_ptr
)
463 known_contexts_ptr
->safe_grow_cleared (count
);
465 for (i
= 0; i
< count
; i
++)
467 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
468 tree cst
= ipa_value_from_jfunc (parms_info
, jf
);
470 if (!cst
&& e
->call_stmt
471 && i
< (int)gimple_call_num_args (e
->call_stmt
))
473 cst
= gimple_call_arg (e
->call_stmt
, i
);
474 if (!is_gimple_min_invariant (cst
))
479 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
480 if (known_vals
.exists ())
483 else if (inline_p
&& !es
->param
[i
].change_prob
)
484 known_vals
[i
] = error_mark_node
;
486 if (known_contexts_ptr
)
487 (*known_contexts_ptr
)[i
] = ipa_context_from_jfunc (parms_info
, e
,
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs
[i
] = &jf
->agg
;
495 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
496 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
498 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
500 if (count
&& (info
->conds
|| known_vals_ptr
))
501 known_vals
.safe_grow_cleared (count
);
502 for (i
= 0; i
< count
; i
++)
504 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
505 if (!is_gimple_min_invariant (cst
))
512 evaluate_conditions_for_known_args (callee
, inline_p
,
513 known_vals
, known_aggs
, clause_ptr
,
517 *known_vals_ptr
= known_vals
;
519 known_vals
.release ();
522 *known_aggs_ptr
= known_aggs
;
524 known_aggs
.release ();
528 /* Allocate the function summary. */
531 ipa_fn_summary_alloc (void)
533 gcc_checking_assert (!ipa_fn_summaries
);
534 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
535 ipa_call_summaries
= new ipa_call_summary_t (symtab
, false);
538 /* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
542 ipa_call_summary::reset ()
544 call_stmt_size
= call_stmt_time
= 0;
545 is_return_callee_uncaptured
= false;
547 edge_predicate_pool
.remove (predicate
);
552 /* We are called multiple time for given function; clear
553 data from previous run so they are not cumulated. */
556 ipa_fn_summary::reset (struct cgraph_node
*node
)
558 struct cgraph_edge
*e
;
561 estimated_stack_size
= 0;
562 estimated_self_stack_size
= 0;
563 stack_frame_offset
= 0;
570 edge_predicate_pool
.remove (loop_iterations
);
571 loop_iterations
= NULL
;
575 edge_predicate_pool
.remove (loop_stride
);
580 edge_predicate_pool
.remove (array_index
);
584 vec_free (size_time_table
);
585 for (e
= node
->callees
; e
; e
= e
->next_callee
)
586 ipa_call_summaries
->get (e
)->reset ();
587 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
588 ipa_call_summaries
->get (e
)->reset ();
589 fp_expressions
= false;
592 /* Hook that is called by cgraph.c when a node is removed. */
595 ipa_fn_summary_t::remove (cgraph_node
*node
, ipa_fn_summary
*info
)
600 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
601 Additionally care about allocating new memory slot for updated predicate
602 and set it to NULL when it becomes true or false (and thus uninteresting).
606 remap_hint_predicate_after_duplication (predicate
**p
,
607 clause_t possible_truths
)
609 predicate new_predicate
;
614 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
615 /* We do not want to free previous predicate; it is used by node origin. */
617 set_hint_predicate (p
, new_predicate
);
621 /* Hook that is called by cgraph.c when a node is duplicated. */
623 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
626 ipa_fn_summary
*info
)
628 memcpy (info
, ipa_fn_summaries
->get (src
), sizeof (ipa_fn_summary
));
629 /* TODO: as an optimization, we may avoid copying conditions
630 that are known to be false or true. */
631 info
->conds
= vec_safe_copy (info
->conds
);
633 /* When there are any replacements in the function body, see if we can figure
634 out that something was optimized out. */
635 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
637 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
638 /* Use SRC parm info since it may not be copied yet. */
639 struct ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
640 vec
<tree
> known_vals
= vNULL
;
641 int count
= ipa_get_param_count (parms_info
);
643 clause_t possible_truths
;
644 predicate true_pred
= true;
646 int optimized_out_size
= 0;
647 bool inlined_to_p
= false;
648 struct cgraph_edge
*edge
, *next
;
650 info
->size_time_table
= 0;
651 known_vals
.safe_grow_cleared (count
);
652 for (i
= 0; i
< count
; i
++)
654 struct ipa_replace_map
*r
;
656 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
658 if (((!r
->old_tree
&& r
->parm_num
== i
)
659 || (r
->old_tree
&& r
->old_tree
== ipa_get_param (parms_info
, i
)))
660 && r
->replace_p
&& !r
->ref_p
)
662 known_vals
[i
] = r
->new_tree
;
667 evaluate_conditions_for_known_args (dst
, false,
671 /* We are going to specialize,
672 so ignore nonspec truths. */
674 known_vals
.release ();
676 info
->account_size_time (0, 0, true_pred
, true_pred
);
678 /* Remap size_time vectors.
679 Simplify the predicate by prunning out alternatives that are known
681 TODO: as on optimization, we can also eliminate conditions known
683 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
685 predicate new_exec_pred
;
686 predicate new_nonconst_pred
;
687 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
689 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
691 if (new_exec_pred
== false || new_nonconst_pred
== false)
692 optimized_out_size
+= e
->size
;
694 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
698 /* Remap edge predicates with the same simplification as above.
699 Also copy constantness arrays. */
700 for (edge
= dst
->callees
; edge
; edge
= next
)
702 predicate new_predicate
;
703 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
704 next
= edge
->next_callee
;
706 if (!edge
->inline_failed
)
710 new_predicate
= es
->predicate
->remap_after_duplication
712 if (new_predicate
== false && *es
->predicate
!= false)
713 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
714 edge_set_predicate (edge
, &new_predicate
);
717 /* Remap indirect edge predicates with the same simplificaiton as above.
718 Also copy constantness arrays. */
719 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
721 predicate new_predicate
;
722 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
723 next
= edge
->next_callee
;
725 gcc_checking_assert (edge
->inline_failed
);
728 new_predicate
= es
->predicate
->remap_after_duplication
730 if (new_predicate
== false && *es
->predicate
!= false)
731 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
732 edge_set_predicate (edge
, &new_predicate
);
734 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
736 remap_hint_predicate_after_duplication (&info
->loop_stride
,
738 remap_hint_predicate_after_duplication (&info
->array_index
,
741 /* If inliner or someone after inliner will ever start producing
742 non-trivial clones, we will get trouble with lack of information
743 about updating self sizes, because size vectors already contains
744 sizes of the calees. */
745 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
749 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
750 if (info
->loop_iterations
)
752 predicate p
= *info
->loop_iterations
;
753 info
->loop_iterations
= NULL
;
754 set_hint_predicate (&info
->loop_iterations
, p
);
756 if (info
->loop_stride
)
758 predicate p
= *info
->loop_stride
;
759 info
->loop_stride
= NULL
;
760 set_hint_predicate (&info
->loop_stride
, p
);
762 if (info
->array_index
)
764 predicate p
= *info
->array_index
;
765 info
->array_index
= NULL
;
766 set_hint_predicate (&info
->array_index
, p
);
769 if (!dst
->global
.inlined_to
)
770 ipa_update_overall_fn_summary (dst
);
774 /* Hook that is called by cgraph.c when a node is duplicated. */
777 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
778 struct cgraph_edge
*dst
,
779 struct ipa_call_summary
*srcinfo
,
780 struct ipa_call_summary
*info
)
783 info
->predicate
= NULL
;
784 edge_set_predicate (dst
, srcinfo
->predicate
);
785 info
->param
= srcinfo
->param
.copy ();
786 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
788 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
789 - eni_size_weights
.call_cost
);
790 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
791 - eni_time_weights
.call_cost
);
796 /* Keep edge cache consistent across edge removal. */
799 ipa_call_summary_t::remove (struct cgraph_edge
*,
800 struct ipa_call_summary
*sum
)
806 /* Dump edge summaries associated to NODE and recursively to all clones.
810 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
811 struct ipa_fn_summary
*info
)
813 struct cgraph_edge
*edge
;
814 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
816 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
817 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
821 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
822 " time: %2i callee size:%2i stack:%2i",
823 indent
, "", callee
->name (), callee
->order
,
825 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
826 indent
, "", es
->loop_depth
, edge
->frequency
,
827 es
->call_stmt_size
, es
->call_stmt_time
,
828 (int) ipa_fn_summaries
->get (callee
)->size
/ ipa_fn_summary::size_scale
,
829 (int) ipa_fn_summaries
->get (callee
)->estimated_stack_size
);
833 fprintf (f
, " predicate: ");
834 es
->predicate
->dump (f
, info
->conds
);
838 if (es
->param
.exists ())
839 for (i
= 0; i
< (int) es
->param
.length (); i
++)
841 int prob
= es
->param
[i
].change_prob
;
844 fprintf (f
, "%*s op%i is compile time invariant\n",
846 else if (prob
!= REG_BR_PROB_BASE
)
847 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
848 prob
* 100.0 / REG_BR_PROB_BASE
);
850 if (!edge
->inline_failed
)
852 fprintf (f
, "%*sStack frame offset %i, callee self size %i,"
855 (int) ipa_fn_summaries
->get (callee
)->stack_frame_offset
,
856 (int) ipa_fn_summaries
->get (callee
)->estimated_self_stack_size
,
857 (int) ipa_fn_summaries
->get (callee
)->estimated_stack_size
);
858 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
861 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
863 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
864 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
868 edge
->frequency
, es
->call_stmt_size
, es
->call_stmt_time
);
871 fprintf (f
, "predicate: ");
872 es
->predicate
->dump (f
, info
->conds
);
881 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
883 if (node
->definition
)
885 struct ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
888 fprintf (f
, "IPA function summary for %s/%i", node
->name (),
890 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
891 fprintf (f
, " always_inline");
893 fprintf (f
, " inlinable");
894 if (s
->contains_cilk_spawn
)
895 fprintf (f
, " contains_cilk_spawn");
896 if (s
->fp_expressions
)
897 fprintf (f
, " fp_expression");
898 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
899 fprintf (f
, " self size: %i\n", s
->self_size
);
900 fprintf (f
, " global size: %i\n", s
->size
);
901 fprintf (f
, " min size: %i\n", s
->min_size
);
902 fprintf (f
, " self stack: %i\n",
903 (int) s
->estimated_self_stack_size
);
904 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
906 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
908 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
909 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
911 fprintf (f
, " size:%f, time:%f",
912 (double) e
->size
/ ipa_fn_summary::size_scale
,
913 e
->time
.to_double ());
914 if (e
->exec_predicate
!= true)
916 fprintf (f
, ", executed if:");
917 e
->exec_predicate
.dump (f
, s
->conds
, 0);
919 if (e
->exec_predicate
!= e
->nonconst_predicate
)
921 fprintf (f
, ", nonconst if:");
922 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
926 if (s
->loop_iterations
)
928 fprintf (f
, " loop iterations:");
929 s
->loop_iterations
->dump (f
, s
->conds
);
933 fprintf (f
, " loop stride:");
934 s
->loop_stride
->dump (f
, s
->conds
);
938 fprintf (f
, " array index:");
939 s
->array_index
->dump (f
, s
->conds
);
941 fprintf (f
, " calls:\n");
942 dump_ipa_call_summary (f
, 4, node
, s
);
948 ipa_debug_fn_summary (struct cgraph_node
*node
)
950 ipa_dump_fn_summary (stderr
, node
);
954 ipa_dump_fn_summaries (FILE *f
)
956 struct cgraph_node
*node
;
958 FOR_EACH_DEFINED_FUNCTION (node
)
959 if (!node
->global
.inlined_to
)
960 ipa_dump_fn_summary (f
, node
);
963 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
964 boolean variable pointed to by DATA. */
967 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
970 bool *b
= (bool *) data
;
975 /* If OP refers to value of function parameter, return the corresponding
976 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
977 PARM_DECL) will be stored to *SIZE_P in that case too. */
980 unmodified_parm_1 (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
982 /* SSA_NAME referring to parm default def? */
983 if (TREE_CODE (op
) == SSA_NAME
984 && SSA_NAME_IS_DEFAULT_DEF (op
)
985 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
988 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
989 return SSA_NAME_VAR (op
);
991 /* Non-SSA parm reference? */
992 if (TREE_CODE (op
) == PARM_DECL
)
994 bool modified
= false;
997 ao_ref_init (&refd
, op
);
998 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), mark_modified
, &modified
,
1003 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
1010 /* If OP refers to value of function parameter, return the corresponding
1011 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1012 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1013 stored to *SIZE_P in that case too. */
1016 unmodified_parm (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
1018 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
1022 if (TREE_CODE (op
) == SSA_NAME
1023 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1024 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1025 return unmodified_parm (SSA_NAME_DEF_STMT (op
),
1026 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1031 /* If OP refers to a value of a function parameter or value loaded from an
1032 aggregate passed to a parameter (either by value or reference), return TRUE
1033 and store the number of the parameter to *INDEX_P, the access size into
1034 *SIZE_P, and information whether and how it has been loaded from an
1035 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1036 statement in which OP is used or loaded. */
1039 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1040 gimple
*stmt
, tree op
, int *index_p
,
1041 HOST_WIDE_INT
*size_p
,
1042 struct agg_position_info
*aggpos
)
1044 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
1046 gcc_checking_assert (aggpos
);
1049 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1052 aggpos
->agg_contents
= false;
1053 aggpos
->by_ref
= false;
1057 if (TREE_CODE (op
) == SSA_NAME
)
1059 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1060 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1062 stmt
= SSA_NAME_DEF_STMT (op
);
1063 op
= gimple_assign_rhs1 (stmt
);
1064 if (!REFERENCE_CLASS_P (op
))
1065 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1069 aggpos
->agg_contents
= true;
1070 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1071 stmt
, op
, index_p
, &aggpos
->offset
,
1072 size_p
, &aggpos
->by_ref
);
1075 /* See if statement might disappear after inlining.
1076 0 - means not eliminated
1077 1 - half of statements goes away
1078 2 - for sure it is eliminated.
1079 We are not terribly sophisticated, basically looking for simple abstraction
1080 penalty wrappers. */
1083 eliminated_by_inlining_prob (gimple
*stmt
)
1085 enum gimple_code code
= gimple_code (stmt
);
1086 enum tree_code rhs_code
;
1096 if (gimple_num_ops (stmt
) != 2)
1099 rhs_code
= gimple_assign_rhs_code (stmt
);
1101 /* Casts of parameters, loads from parameters passed by reference
1102 and stores to return value or parameters are often free after
1103 inlining dua to SRA and further combining.
1104 Assume that half of statements goes away. */
1105 if (CONVERT_EXPR_CODE_P (rhs_code
)
1106 || rhs_code
== VIEW_CONVERT_EXPR
1107 || rhs_code
== ADDR_EXPR
1108 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1110 tree rhs
= gimple_assign_rhs1 (stmt
);
1111 tree lhs
= gimple_assign_lhs (stmt
);
1112 tree inner_rhs
= get_base_address (rhs
);
1113 tree inner_lhs
= get_base_address (lhs
);
1114 bool rhs_free
= false;
1115 bool lhs_free
= false;
1122 /* Reads of parameter are expected to be free. */
1123 if (unmodified_parm (stmt
, inner_rhs
, NULL
))
1125 /* Match expressions of form &this->field. Those will most likely
1126 combine with something upstream after inlining. */
1127 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1129 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1130 if (TREE_CODE (op
) == PARM_DECL
)
1132 else if (TREE_CODE (op
) == MEM_REF
1133 && unmodified_parm (stmt
, TREE_OPERAND (op
, 0), NULL
))
1137 /* When parameter is not SSA register because its address is taken
1138 and it is just copied into one, the statement will be completely
1139 free after inlining (we will copy propagate backward). */
1140 if (rhs_free
&& is_gimple_reg (lhs
))
1143 /* Reads of parameters passed by reference
1144 expected to be free (i.e. optimized out after inlining). */
1145 if (TREE_CODE (inner_rhs
) == MEM_REF
1146 && unmodified_parm (stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1149 /* Copying parameter passed by reference into gimple register is
1150 probably also going to copy propagate, but we can't be quite
1152 if (rhs_free
&& is_gimple_reg (lhs
))
1155 /* Writes to parameters, parameters passed by value and return value
1156 (either dirrectly or passed via invisible reference) are free.
1158 TODO: We ought to handle testcase like
1159 struct a {int a,b;};
1161 retrurnsturct (void)
1167 This translate into:
1182 For that we either need to copy ipa-split logic detecting writes
1184 if (TREE_CODE (inner_lhs
) == PARM_DECL
1185 || TREE_CODE (inner_lhs
) == RESULT_DECL
1186 || (TREE_CODE (inner_lhs
) == MEM_REF
1187 && (unmodified_parm (stmt
, TREE_OPERAND (inner_lhs
, 0), NULL
)
1188 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1189 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1190 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1192 0))) == RESULT_DECL
))))
1195 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1197 if (lhs_free
&& rhs_free
)
1207 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1208 predicates to the CFG edges. */
1211 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1212 struct ipa_fn_summary
*summary
,
1219 struct agg_position_info aggpos
;
1220 enum tree_code code
, inverted_code
;
1226 last
= last_stmt (bb
);
1227 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1229 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1231 op
= gimple_cond_lhs (last
);
1232 /* TODO: handle conditionals like
1235 if (unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1237 code
= gimple_cond_code (last
);
1238 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1240 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1242 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1243 ? code
: inverted_code
);
1244 /* invert_tree_comparison will return ERROR_MARK on FP
1245 comparsions that are not EQ/NE instead of returning proper
1246 unordered one. Be sure it is not confused with NON_CONSTANT. */
1247 if (this_code
!= ERROR_MARK
)
1250 = add_condition (summary
, index
, size
, &aggpos
, this_code
,
1251 unshare_expr_without_location
1252 (gimple_cond_rhs (last
)));
1253 e
->aux
= edge_predicate_pool
.allocate ();
1254 *(predicate
*) e
->aux
= p
;
1259 if (TREE_CODE (op
) != SSA_NAME
)
1262 if (builtin_constant_p (op))
1266 Here we can predicate nonconstant_code. We can't
1267 really handle constant_code since we have no predicate
1268 for this and also the constant code is not known to be
1269 optimized away when inliner doen't see operand is constant.
1270 Other optimizers might think otherwise. */
1271 if (gimple_cond_code (last
) != NE_EXPR
1272 || !integer_zerop (gimple_cond_rhs (last
)))
1274 set_stmt
= SSA_NAME_DEF_STMT (op
);
1275 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1276 || gimple_call_num_args (set_stmt
) != 1)
1278 op2
= gimple_call_arg (set_stmt
, 0);
1279 if (!unmodified_parm_or_parm_agg_item (fbi
, set_stmt
, op2
, &index
, &size
,
1282 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1284 predicate p
= add_condition (summary
, index
, size
, &aggpos
,
1285 predicate::is_not_constant
, NULL_TREE
);
1286 e
->aux
= edge_predicate_pool
.allocate ();
1287 *(predicate
*) e
->aux
= p
;
1292 /* If BB ends by a switch we can turn into predicates, attach corresponding
1293 predicates to the CFG edges. */
1296 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1297 struct ipa_fn_summary
*summary
,
1304 struct agg_position_info aggpos
;
1310 lastg
= last_stmt (bb
);
1311 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1313 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1314 op
= gimple_switch_index (last
);
1315 if (!unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1318 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1320 e
->aux
= edge_predicate_pool
.allocate ();
1321 *(predicate
*) e
->aux
= false;
1323 n
= gimple_switch_num_labels (last
);
1324 for (case_idx
= 0; case_idx
< n
; ++case_idx
)
1326 tree cl
= gimple_switch_label (last
, case_idx
);
1330 e
= find_edge (bb
, label_to_block (CASE_LABEL (cl
)));
1331 min
= CASE_LOW (cl
);
1332 max
= CASE_HIGH (cl
);
1334 /* For default we might want to construct predicate that none
1335 of cases is met, but it is bit hard to do not having negations
1336 of conditionals handy. */
1340 p
= add_condition (summary
, index
, size
, &aggpos
, EQ_EXPR
,
1341 unshare_expr_without_location (min
));
1345 p1
= add_condition (summary
, index
, size
, &aggpos
, GE_EXPR
,
1346 unshare_expr_without_location (min
));
1347 p2
= add_condition (summary
, index
, size
, &aggpos
, LE_EXPR
,
1348 unshare_expr_without_location (max
));
1351 *(struct predicate
*) e
->aux
1352 = p
.or_with (summary
->conds
, *(struct predicate
*) e
->aux
);
1357 /* For each BB in NODE attach to its AUX pointer predicate under
1358 which it is executable. */
1361 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1362 struct cgraph_node
*node
,
1363 struct ipa_fn_summary
*summary
)
1365 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1369 FOR_EACH_BB_FN (bb
, my_function
)
1371 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1372 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1375 /* Entry block is always executable. */
1376 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1377 = edge_predicate_pool
.allocate ();
1378 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1380 /* A simple dataflow propagation of predicates forward in the CFG.
1381 TODO: work in reverse postorder. */
1385 FOR_EACH_BB_FN (bb
, my_function
)
1387 predicate p
= false;
1390 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1394 predicate this_bb_predicate
1395 = *(predicate
*) e
->src
->aux
;
1397 this_bb_predicate
&= (*(struct predicate
*) e
->aux
);
1398 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1404 gcc_checking_assert (!bb
->aux
);
1410 bb
->aux
= edge_predicate_pool
.allocate ();
1411 *((predicate
*) bb
->aux
) = p
;
1413 else if (p
!= *(predicate
*) bb
->aux
)
1415 /* This OR operation is needed to ensure monotonous data flow
1416 in the case we hit the limit on number of clauses and the
1417 and/or operations above give approximate answers. */
1418 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1419 if (p
!= *(predicate
*) bb
->aux
)
1422 *((predicate
*) bb
->aux
) = p
;
1431 /* Return predicate specifying when the STMT might have result that is not
1432 a compile time constant. */
1435 will_be_nonconstant_expr_predicate (struct ipa_node_params
*info
,
1436 struct ipa_fn_summary
*summary
,
1438 vec
<predicate
> nonconstant_names
)
1444 while (UNARY_CLASS_P (expr
))
1445 expr
= TREE_OPERAND (expr
, 0);
1447 parm
= unmodified_parm (NULL
, expr
, &size
);
1448 if (parm
&& (index
= ipa_get_param_decl_index (info
, parm
)) >= 0)
1449 return add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1451 if (is_gimple_min_invariant (expr
))
1453 if (TREE_CODE (expr
) == SSA_NAME
)
1454 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1455 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1457 predicate p1
= will_be_nonconstant_expr_predicate
1458 (info
, summary
, TREE_OPERAND (expr
, 0),
1464 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1465 TREE_OPERAND (expr
, 1),
1467 return p1
.or_with (summary
->conds
, p2
);
1469 else if (TREE_CODE (expr
) == COND_EXPR
)
1471 predicate p1
= will_be_nonconstant_expr_predicate
1472 (info
, summary
, TREE_OPERAND (expr
, 0),
1478 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1479 TREE_OPERAND (expr
, 1),
1483 p1
= p1
.or_with (summary
->conds
, p2
);
1484 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1485 TREE_OPERAND (expr
, 2),
1487 return p2
.or_with (summary
->conds
, p1
);
1498 /* Return predicate specifying when the STMT might have result that is not
1499 a compile time constant. */
1502 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1503 struct ipa_fn_summary
*summary
,
1505 vec
<predicate
> nonconstant_names
)
1510 predicate op_non_const
;
1514 struct agg_position_info aggpos
;
1516 /* What statments might be optimized away
1517 when their arguments are constant. */
1518 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1519 && gimple_code (stmt
) != GIMPLE_COND
1520 && gimple_code (stmt
) != GIMPLE_SWITCH
1521 && (gimple_code (stmt
) != GIMPLE_CALL
1522 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1525 /* Stores will stay anyway. */
1526 if (gimple_store_p (stmt
))
1529 is_load
= gimple_assign_load_p (stmt
);
1531 /* Loads can be optimized when the value is known. */
1535 gcc_assert (gimple_assign_single_p (stmt
));
1536 op
= gimple_assign_rhs1 (stmt
);
1537 if (!unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, &base_index
, &size
,
1544 /* See if we understand all operands before we start
1545 adding conditionals. */
1546 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1548 tree parm
= unmodified_parm (stmt
, use
, NULL
);
1549 /* For arguments we can build a condition. */
1550 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1552 if (TREE_CODE (use
) != SSA_NAME
)
1554 /* If we know when operand is constant,
1555 we still can say something useful. */
1556 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1563 add_condition (summary
, base_index
, size
, &aggpos
, predicate::changed
,
1566 op_non_const
= false;
1567 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1570 tree parm
= unmodified_parm (stmt
, use
, &size
);
1573 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1575 if (index
!= base_index
)
1576 p
= add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1582 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1583 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1585 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1586 && gimple_op (stmt
, 0)
1587 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1588 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1590 return op_non_const
;
1593 struct record_modified_bb_info
1599 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1600 probability how often it changes between USE_BB.
1601 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1602 is in different loop nest, we can do better.
1603 This is all just estimate. In theory we look for minimal cut separating
1604 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1608 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1610 struct loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1611 if (l
&& l
->header
->frequency
< init_bb
->frequency
)
1616 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1617 set except for info->stmt. */
1620 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1622 struct record_modified_bb_info
*info
=
1623 (struct record_modified_bb_info
*) data
;
1624 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1626 bitmap_set_bit (info
->bb_set
,
1627 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1628 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1630 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1631 gimple_bb (info
->stmt
))->index
);
1635 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1636 will change since last invocation of STMT.
1638 Value 0 is reserved for compile time invariants.
1639 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1640 ought to be REG_BR_PROB_BASE / estimated_iters. */
1643 param_change_prob (gimple
*stmt
, int i
)
1645 tree op
= gimple_call_arg (stmt
, i
);
1646 basic_block bb
= gimple_bb (stmt
);
1648 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1649 op
= TREE_OPERAND (op
, 0);
1651 tree base
= get_base_address (op
);
1653 /* Global invariants never change. */
1654 if (is_gimple_min_invariant (base
))
1657 /* We would have to do non-trivial analysis to really work out what
1658 is the probability of value to change (i.e. when init statement
1659 is in a sibling loop of the call).
1661 We do an conservative estimate: when call is executed N times more often
1662 than the statement defining value, we take the frequency 1/N. */
1663 if (TREE_CODE (base
) == SSA_NAME
)
1668 return REG_BR_PROB_BASE
;
1670 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1671 init_freq
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
;
1673 init_freq
= get_minimal_bb
1674 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1675 gimple_bb (stmt
))->frequency
;
1679 if (init_freq
< bb
->frequency
)
1680 return MAX (GCOV_COMPUTE_SCALE (init_freq
, bb
->frequency
), 1);
1682 return REG_BR_PROB_BASE
;
1688 struct record_modified_bb_info info
;
1691 tree init
= ctor_for_folding (base
);
1693 if (init
!= error_mark_node
)
1696 return REG_BR_PROB_BASE
;
1697 ao_ref_init (&refd
, op
);
1699 info
.bb_set
= BITMAP_ALLOC (NULL
);
1700 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1702 if (bitmap_bit_p (info
.bb_set
, bb
->index
))
1704 BITMAP_FREE (info
.bb_set
);
1705 return REG_BR_PROB_BASE
;
1708 /* Assume that every memory is initialized at entry.
1709 TODO: Can we easilly determine if value is always defined
1710 and thus we may skip entry block? */
1711 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
)
1712 max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->frequency
;
1716 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
1717 max
= MIN (max
, BASIC_BLOCK_FOR_FN (cfun
, index
)->frequency
);
1719 BITMAP_FREE (info
.bb_set
);
1720 if (max
< bb
->frequency
)
1721 return MAX (GCOV_COMPUTE_SCALE (max
, bb
->frequency
), 1);
1723 return REG_BR_PROB_BASE
;
1727 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1728 sub-graph and if the predicate the condition depends on is known. If so,
1729 return true and store the pointer the predicate in *P. */
1732 phi_result_unknown_predicate (struct ipa_node_params
*info
,
1733 ipa_fn_summary
*summary
, basic_block bb
,
1735 vec
<predicate
> nonconstant_names
)
1739 basic_block first_bb
= NULL
;
1742 if (single_pred_p (bb
))
1748 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1750 if (single_succ_p (e
->src
))
1752 if (!single_pred_p (e
->src
))
1755 first_bb
= single_pred (e
->src
);
1756 else if (single_pred (e
->src
) != first_bb
)
1763 else if (e
->src
!= first_bb
)
1771 stmt
= last_stmt (first_bb
);
1773 || gimple_code (stmt
) != GIMPLE_COND
1774 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
1777 *p
= will_be_nonconstant_expr_predicate (info
, summary
,
1778 gimple_cond_lhs (stmt
),
1786 /* Given a PHI statement in a function described by inline properties SUMMARY
1787 and *P being the predicate describing whether the selected PHI argument is
1788 known, store a predicate for the result of the PHI statement into
1789 NONCONSTANT_NAMES, if possible. */
1792 predicate_for_phi_result (struct ipa_fn_summary
*summary
, gphi
*phi
,
1794 vec
<predicate
> nonconstant_names
)
1798 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
1800 tree arg
= gimple_phi_arg (phi
, i
)->def
;
1801 if (!is_gimple_min_invariant (arg
))
1803 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
1804 *p
= p
->or_with (summary
->conds
,
1805 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
1811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1813 fprintf (dump_file
, "\t\tphi predicate: ");
1814 p
->dump (dump_file
, summary
->conds
);
1816 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
1819 /* Return predicate specifying when array index in access OP becomes non-constant. */
1822 array_index_predicate (ipa_fn_summary
*info
,
1823 vec
< predicate
> nonconstant_names
, tree op
)
1825 predicate p
= false;
1826 while (handled_component_p (op
))
1828 if (TREE_CODE (op
) == ARRAY_REF
|| TREE_CODE (op
) == ARRAY_RANGE_REF
)
1830 if (TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
1831 p
= p
.or_with (info
->conds
,
1832 nonconstant_names
[SSA_NAME_VERSION
1833 (TREE_OPERAND (op
, 1))]);
1835 op
= TREE_OPERAND (op
, 0);
1840 /* For a typical usage of __builtin_expect (a<b, 1), we
1841 may introduce an extra relation stmt:
1842 With the builtin, we have
1845 t3 = __builtin_expect (t2, 1);
1848 Without the builtin, we have
1851 This affects the size/time estimation and may have
1852 an impact on the earlier inlining.
1853 Here find this pattern and fix it up later. */
1856 find_foldable_builtin_expect (basic_block bb
)
1858 gimple_stmt_iterator bsi
;
1860 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1862 gimple
*stmt
= gsi_stmt (bsi
);
1863 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
1864 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
1866 tree var
= gimple_call_lhs (stmt
);
1867 tree arg
= gimple_call_arg (stmt
, 0);
1868 use_operand_p use_p
;
1875 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
1877 while (TREE_CODE (arg
) == SSA_NAME
)
1879 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
1880 if (!is_gimple_assign (stmt_tmp
))
1882 switch (gimple_assign_rhs_code (stmt_tmp
))
1901 arg
= gimple_assign_rhs1 (stmt_tmp
);
1904 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
1905 && gimple_code (use_stmt
) == GIMPLE_COND
)
1912 /* Return true when the basic blocks contains only clobbers followed by RESX.
1913 Such BBs are kept around to make removal of dead stores possible with
1914 presence of EH and will be optimized out by optimize_clobbers later in the
1917 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1918 that can be clobber only, too.. When it is false, the RESX is not necessary
1919 on the end of basic block. */
1922 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
1924 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1930 if (gsi_end_p (gsi
))
1932 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
1936 else if (!single_succ_p (bb
))
1939 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
1941 gimple
*stmt
= gsi_stmt (gsi
);
1942 if (is_gimple_debug (stmt
))
1944 if (gimple_clobber_p (stmt
))
1946 if (gimple_code (stmt
) == GIMPLE_LABEL
)
1951 /* See if all predecestors are either throws or clobber only BBs. */
1952 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1953 if (!(e
->flags
& EDGE_EH
)
1954 && !clobber_only_eh_bb_p (e
->src
, false))
1960 /* Return true if STMT compute a floating point expression that may be affected
1961 by -ffast-math and similar flags. */
1964 fp_expression_p (gimple
*stmt
)
1969 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
1970 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
1975 /* Analyze function body for NODE.
1976 EARLY indicates run from early optimization pipeline. */
1979 analyze_function_body (struct cgraph_node
*node
, bool early
)
1982 /* Estimate static overhead for function prologue/epilogue and alignment. */
1984 /* Benefits are scaled by probability of elimination that is in range
1987 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1989 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
1990 predicate bb_predicate
;
1991 struct ipa_func_body_info fbi
;
1992 vec
<predicate
> nonconstant_names
= vNULL
;
1995 predicate array_index
= true;
1996 gimple
*fix_builtin_expect_stmt
;
1998 gcc_assert (my_function
&& my_function
->cfg
);
1999 gcc_assert (cfun
== my_function
);
2001 memset(&fbi
, 0, sizeof(fbi
));
2003 info
->size_time_table
= NULL
;
2005 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2006 so we can produce proper inline hints.
2008 When optimizing and analyzing for early inliner, initialize node params
2009 so we can produce correct BB predicates. */
2011 if (opt_for_fn (node
->decl
, optimize
))
2013 calculate_dominance_info (CDI_DOMINATORS
);
2015 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2018 ipa_check_create_node_params ();
2019 ipa_initialize_node_params (node
);
2022 if (ipa_node_params_sum
)
2025 fbi
.info
= IPA_NODE_REF (node
);
2026 fbi
.bb_infos
= vNULL
;
2027 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2028 fbi
.param_count
= count_formal_params(node
->decl
);
2029 nonconstant_names
.safe_grow_cleared
2030 (SSANAMES (my_function
)->length ());
2035 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2038 /* When we run into maximal number of entries, we assign everything to the
2039 constant truth case. Be sure to have it in list. */
2040 bb_predicate
= true;
2041 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2043 bb_predicate
= predicate::not_inlined ();
2044 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, bb_predicate
,
2048 compute_bb_predicates (&fbi
, node
, info
);
2049 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2050 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2051 for (n
= 0; n
< nblocks
; n
++)
2053 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2054 freq
= compute_call_stmt_bb_frequency (node
->decl
, bb
);
2055 if (clobber_only_eh_bb_p (bb
))
2057 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2058 fprintf (dump_file
, "\n Ignoring BB %i;"
2059 " it will be optimized away by cleanup_clobbers\n",
2064 /* TODO: Obviously predicates can be propagated down across CFG. */
2068 bb_predicate
= *(predicate
*) bb
->aux
;
2070 bb_predicate
= false;
2073 bb_predicate
= true;
2075 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2077 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2078 bb_predicate
.dump (dump_file
, info
->conds
);
2081 if (fbi
.info
&& nonconstant_names
.exists ())
2083 predicate phi_predicate
;
2084 bool first_phi
= true;
2086 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2090 && !phi_result_unknown_predicate (fbi
.info
, info
, bb
,
2095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2097 fprintf (dump_file
, " ");
2098 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2100 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2105 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2107 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
2110 gimple
*stmt
= gsi_stmt (bsi
);
2111 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2112 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2114 predicate will_be_nonconstant
;
2116 /* This relation stmt should be folded after we remove
2117 buildin_expect call. Adjust the cost here. */
2118 if (stmt
== fix_builtin_expect_stmt
)
2124 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2126 fprintf (dump_file
, " ");
2127 print_gimple_stmt (dump_file
, stmt
, 0);
2128 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2129 ((double) freq
) / CGRAPH_FREQ_BASE
, this_size
,
2133 if (gimple_assign_load_p (stmt
) && nonconstant_names
.exists ())
2135 predicate this_array_index
;
2137 array_index_predicate (info
, nonconstant_names
,
2138 gimple_assign_rhs1 (stmt
));
2139 if (this_array_index
!= false)
2140 array_index
&= this_array_index
;
2142 if (gimple_store_p (stmt
) && nonconstant_names
.exists ())
2144 predicate this_array_index
;
2146 array_index_predicate (info
, nonconstant_names
,
2147 gimple_get_lhs (stmt
));
2148 if (this_array_index
!= false)
2149 array_index
&= this_array_index
;
2153 if (is_gimple_call (stmt
)
2154 && !gimple_call_internal_p (stmt
))
2156 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2157 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2159 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2160 resolved as constant. We however don't want to optimize
2161 out the cgraph edges. */
2162 if (nonconstant_names
.exists ()
2163 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2164 && gimple_call_lhs (stmt
)
2165 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2167 predicate false_p
= false;
2168 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2171 if (ipa_node_params_sum
)
2173 int count
= gimple_call_num_args (stmt
);
2177 es
->param
.safe_grow_cleared (count
);
2178 for (i
= 0; i
< count
; i
++)
2180 int prob
= param_change_prob (stmt
, i
);
2181 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2182 es
->param
[i
].change_prob
= prob
;
2186 es
->call_stmt_size
= this_size
;
2187 es
->call_stmt_time
= this_time
;
2188 es
->loop_depth
= bb_loop_depth (bb
);
2189 edge_set_predicate (edge
, &bb_predicate
);
2192 /* TODO: When conditional jump or swithc is known to be constant, but
2193 we did not translate it into the predicates, we really can account
2194 just maximum of the possible paths. */
2197 = will_be_nonconstant_predicate (&fbi
, info
,
2198 stmt
, nonconstant_names
);
2200 will_be_nonconstant
= true;
2201 if (this_time
|| this_size
)
2205 prob
= eliminated_by_inlining_prob (stmt
);
2206 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2208 "\t\t50%% will be eliminated by inlining\n");
2209 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2210 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2212 struct predicate p
= bb_predicate
& will_be_nonconstant
;
2214 /* We can ignore statement when we proved it is never going
2215 to happen, but we can not do that for call statements
2216 because edges are accounted specially. */
2218 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2224 /* We account everything but the calls. Calls have their own
2225 size/time info attached to cgraph edges. This is necessary
2226 in order to make the cost disappear after inlining. */
2227 if (!is_gimple_call (stmt
))
2231 predicate ip
= bb_predicate
& predicate::not_inlined ();
2232 info
->account_size_time (this_size
* prob
,
2233 (sreal
)(this_time
* prob
)
2234 / (CGRAPH_FREQ_BASE
* 2), ip
,
2238 info
->account_size_time (this_size
* (2 - prob
),
2239 (sreal
)(this_time
* (2 - prob
))
2240 / (CGRAPH_FREQ_BASE
* 2),
2245 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2247 info
->fp_expressions
= true;
2249 fprintf (dump_file
, " fp_expression set\n");
2252 gcc_assert (time
>= 0);
2253 gcc_assert (size
>= 0);
2257 set_hint_predicate (&ipa_fn_summaries
->get (node
)->array_index
, array_index
);
2258 time
= time
/ CGRAPH_FREQ_BASE
;
2261 if (nonconstant_names
.exists () && !early
)
2264 predicate loop_iterations
= true;
2265 predicate loop_stride
= true;
2267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2268 flow_loops_dump (dump_file
, NULL
, 0);
2270 FOR_EACH_LOOP (loop
, 0)
2275 struct tree_niter_desc niter_desc
;
2276 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2278 exits
= get_loop_exit_edges (loop
);
2279 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2280 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2281 && !is_gimple_min_invariant (niter_desc
.niter
))
2283 predicate will_be_nonconstant
2284 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2287 if (will_be_nonconstant
!= true)
2288 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2289 if (will_be_nonconstant
!= true
2290 && will_be_nonconstant
!= false)
2291 /* This is slightly inprecise. We may want to represent each
2292 loop with independent predicate. */
2293 loop_iterations
&= will_be_nonconstant
;
2298 /* To avoid quadratic behavior we analyze stride predicates only
2299 with respect to the containing loop. Thus we simply iterate
2300 over all defs in the outermost loop body. */
2301 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2302 loop
!= NULL
; loop
= loop
->next
)
2304 basic_block
*body
= get_loop_body (loop
);
2305 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2307 gimple_stmt_iterator gsi
;
2308 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2309 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2312 gimple
*stmt
= gsi_stmt (gsi
);
2314 if (!is_gimple_assign (stmt
))
2317 tree def
= gimple_assign_lhs (stmt
);
2318 if (TREE_CODE (def
) != SSA_NAME
)
2322 if (!simple_iv (loop_containing_stmt (stmt
),
2323 loop_containing_stmt (stmt
),
2325 || is_gimple_min_invariant (iv
.step
))
2328 predicate will_be_nonconstant
2329 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2332 if (will_be_nonconstant
!= true)
2333 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2334 if (will_be_nonconstant
!= true
2335 && will_be_nonconstant
!= false)
2336 /* This is slightly inprecise. We may want to represent
2337 each loop with independent predicate. */
2338 loop_stride
= loop_stride
& will_be_nonconstant
;
2343 set_hint_predicate (&ipa_fn_summaries
->get (node
)->loop_iterations
,
2345 set_hint_predicate (&ipa_fn_summaries
->get (node
)->loop_stride
,
2349 FOR_ALL_BB_FN (bb
, my_function
)
2355 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2357 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2360 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2364 ipa_fn_summaries
->get (node
)->time
= time
;
2365 ipa_fn_summaries
->get (node
)->self_size
= size
;
2366 nonconstant_names
.release ();
2367 ipa_release_body_info (&fbi
);
2368 if (opt_for_fn (node
->decl
, optimize
))
2371 loop_optimizer_finalize ();
2372 else if (!ipa_edge_args_sum
)
2373 ipa_free_all_node_params ();
2374 free_dominance_info (CDI_DOMINATORS
);
2378 fprintf (dump_file
, "\n");
2379 ipa_dump_fn_summary (dump_file
, node
);
2384 /* Compute function summary.
2385 EARLY is true when we compute parameters during early opts. */
2388 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2390 HOST_WIDE_INT self_stack_size
;
2391 struct cgraph_edge
*e
;
2392 struct ipa_fn_summary
*info
;
2394 gcc_assert (!node
->global
.inlined_to
);
2396 if (!ipa_fn_summaries
)
2397 ipa_fn_summary_alloc ();
2399 info
= ipa_fn_summaries
->get (node
);
2402 /* Estimate the stack size for the function if we're optimizing. */
2403 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2404 ? estimated_stack_frame_size (node
) : 0;
2405 info
->estimated_self_stack_size
= self_stack_size
;
2406 info
->estimated_stack_size
= self_stack_size
;
2407 info
->stack_frame_offset
= 0;
2409 if (node
->thunk
.thunk_p
)
2411 struct ipa_call_summary
*es
= ipa_call_summaries
->get (node
->callees
);
2414 node
->local
.can_change_signature
= false;
2415 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2416 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2417 info
->account_size_time (ipa_fn_summary::size_scale
* 2, 2, t
, t
);
2418 t
= predicate::not_inlined ();
2419 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2420 ipa_update_overall_fn_summary (node
);
2421 info
->self_size
= info
->size
;
2422 /* We can not inline instrumentation clones. */
2423 if (node
->thunk
.add_pointer_bounds_args
)
2425 info
->inlinable
= false;
2426 node
->callees
->inline_failed
= CIF_CHKP
;
2429 info
->inlinable
= true;
2433 /* Even is_gimple_min_invariant rely on current_function_decl. */
2434 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2436 /* Can this function be inlined at all? */
2437 if (!opt_for_fn (node
->decl
, optimize
)
2438 && !lookup_attribute ("always_inline",
2439 DECL_ATTRIBUTES (node
->decl
)))
2440 info
->inlinable
= false;
2442 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2444 info
->contains_cilk_spawn
= fn_contains_cilk_spawn_p (cfun
);
2446 /* Type attributes can use parameter indices to describe them. */
2447 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
)))
2448 node
->local
.can_change_signature
= false;
2451 /* Otherwise, inlinable functions always can change signature. */
2452 if (info
->inlinable
)
2453 node
->local
.can_change_signature
= true;
2456 /* Functions calling builtin_apply can not change signature. */
2457 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2459 tree
cdecl = e
->callee
->decl
;
2460 if (DECL_BUILT_IN (cdecl)
2461 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2462 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2463 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START
))
2466 node
->local
.can_change_signature
= !e
;
2469 /* Functions called by instrumentation thunk can't change signature
2470 because instrumentation thunk modification is not supported. */
2471 if (node
->local
.can_change_signature
)
2472 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2473 if (e
->caller
->thunk
.thunk_p
2474 && e
->caller
->thunk
.add_pointer_bounds_args
)
2476 node
->local
.can_change_signature
= false;
2479 analyze_function_body (node
, early
);
2482 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2483 if (e
->callee
->comdat_local_p ())
2485 node
->calls_comdat_local
= (e
!= NULL
);
2487 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2488 info
->size
= info
->self_size
;
2489 info
->stack_frame_offset
= 0;
2490 info
->estimated_stack_size
= info
->estimated_self_stack_size
;
2492 /* Code above should compute exactly the same result as
2493 ipa_update_overall_fn_summary but because computation happens in
2494 different order the roundoff errors result in slight changes. */
2495 ipa_update_overall_fn_summary (node
);
2496 gcc_assert (info
->size
== info
->self_size
);
2500 /* Compute parameters of functions used by inliner using
2501 current_function_decl. */
2504 compute_fn_summary_for_current (void)
2506 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2510 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2511 KNOWN_CONTEXTS and KNOWN_AGGS. */
2514 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2515 int *size
, int *time
,
2516 vec
<tree
> known_vals
,
2517 vec
<ipa_polymorphic_call_context
> known_contexts
,
2518 vec
<ipa_agg_jump_function_p
> known_aggs
)
2521 struct cgraph_node
*callee
;
2522 struct ipa_fn_summary
*isummary
;
2523 enum availability avail
;
2526 if (!known_vals
.exists () && !known_contexts
.exists ())
2528 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2531 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2532 known_aggs
, &speculative
);
2533 if (!target
|| speculative
)
2536 /* Account for difference in cost between indirect and direct calls. */
2537 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2538 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2539 gcc_checking_assert (*time
>= 0);
2540 gcc_checking_assert (*size
>= 0);
2542 callee
= cgraph_node::get (target
);
2543 if (!callee
|| !callee
->definition
)
2545 callee
= callee
->function_symbol (&avail
);
2546 if (avail
< AVAIL_AVAILABLE
)
2548 isummary
= ipa_fn_summaries
->get (callee
);
2549 return isummary
->inlinable
;
2552 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2553 handle edge E with probability PROB.
2554 Set HINTS if edge may be devirtualized.
2555 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2559 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2562 vec
<tree
> known_vals
,
2563 vec
<ipa_polymorphic_call_context
> known_contexts
,
2564 vec
<ipa_agg_jump_function_p
> known_aggs
,
2567 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2568 int call_size
= es
->call_stmt_size
;
2569 int call_time
= es
->call_stmt_time
;
2572 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2573 known_vals
, known_contexts
, known_aggs
)
2574 && hints
&& e
->maybe_hot_p ())
2575 *hints
|= INLINE_HINT_indirect_call
;
2576 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2579 *min_size
+= cur_size
;
2580 if (prob
== REG_BR_PROB_BASE
)
2581 *time
+= ((sreal
)(call_time
* e
->frequency
)) / CGRAPH_FREQ_BASE
;
2583 *time
+= ((sreal
)call_time
) * (prob
* e
->frequency
)
2584 / (CGRAPH_FREQ_BASE
* REG_BR_PROB_BASE
);
2589 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2590 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2591 describe context of the call site. */
2594 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2595 int *min_size
, sreal
*time
,
2597 clause_t possible_truths
,
2598 vec
<tree
> known_vals
,
2599 vec
<ipa_polymorphic_call_context
> known_contexts
,
2600 vec
<ipa_agg_jump_function_p
> known_aggs
)
2602 struct cgraph_edge
*e
;
2603 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2605 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2607 /* Do not care about zero sized builtins. */
2608 if (e
->inline_failed
&& !es
->call_stmt_size
)
2610 gcc_checking_assert (!es
->call_stmt_time
);
2614 || es
->predicate
->evaluate (possible_truths
))
2616 if (e
->inline_failed
)
2618 /* Predicates of calls shall not use NOT_CHANGED codes,
2619 sowe do not need to compute probabilities. */
2620 estimate_edge_size_and_time (e
, size
,
2621 es
->predicate
? NULL
: min_size
,
2622 time
, REG_BR_PROB_BASE
,
2623 known_vals
, known_contexts
,
2627 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2630 known_vals
, known_contexts
,
2634 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2636 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2638 || es
->predicate
->evaluate (possible_truths
))
2639 estimate_edge_size_and_time (e
, size
,
2640 es
->predicate
? NULL
: min_size
,
2641 time
, REG_BR_PROB_BASE
,
2642 known_vals
, known_contexts
, known_aggs
,
2648 /* Estimate size and time needed to execute NODE assuming
2649 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2650 information about NODE's arguments. If non-NULL use also probability
2651 information present in INLINE_PARAM_SUMMARY vector.
2652 Additionally detemine hints determined by the context. Finally compute
2653 minimal size needed for the call that is independent on the call context and
2654 can be used for fast estimates. Return the values in RET_SIZE,
2655 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2658 estimate_node_size_and_time (struct cgraph_node
*node
,
2659 clause_t possible_truths
,
2660 clause_t nonspec_possible_truths
,
2661 vec
<tree
> known_vals
,
2662 vec
<ipa_polymorphic_call_context
> known_contexts
,
2663 vec
<ipa_agg_jump_function_p
> known_aggs
,
2664 int *ret_size
, int *ret_min_size
,
2666 sreal
*ret_nonspecialized_time
,
2667 ipa_hints
*ret_hints
,
2668 vec
<inline_param_summary
>
2669 inline_param_summary
)
2671 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
2676 ipa_hints hints
= 0;
2679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2682 fprintf (dump_file
, " Estimating body: %s/%i\n"
2683 " Known to be false: ", node
->name (),
2686 for (i
= predicate::not_inlined_condition
;
2687 i
< (predicate::first_dynamic_condition
2688 + (int) vec_safe_length (info
->conds
)); i
++)
2689 if (!(possible_truths
& (1 << i
)))
2692 fprintf (dump_file
, ", ");
2694 dump_condition (dump_file
, info
->conds
, i
);
2698 estimate_calls_size_and_time (node
, &size
, &min_size
, &time
, &hints
, possible_truths
,
2699 known_vals
, known_contexts
, known_aggs
);
2700 sreal nonspecialized_time
= time
;
2702 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
2704 bool exec
= e
->exec_predicate
.evaluate (nonspec_possible_truths
);
2706 /* Because predicates are conservative, it can happen that nonconst is 1
2710 bool nonconst
= e
->nonconst_predicate
.evaluate (possible_truths
);
2712 gcc_checking_assert (e
->time
>= 0);
2713 gcc_checking_assert (time
>= 0);
2715 /* We compute specialized size only because size of nonspecialized
2716 copy is context independent.
2718 The difference between nonspecialized execution and specialized is
2719 that nonspecialized is not going to have optimized out computations
2720 known to be constant in a specialized setting. */
2723 nonspecialized_time
+= e
->time
;
2726 else if (!inline_param_summary
.exists ())
2733 int prob
= e
->nonconst_predicate
.probability
2734 (info
->conds
, possible_truths
,
2735 inline_param_summary
);
2736 gcc_checking_assert (prob
>= 0);
2737 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
2738 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
2740 gcc_checking_assert (time
>= 0);
2743 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
2744 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
2745 min_size
= (*info
->size_time_table
)[0].size
;
2746 gcc_checking_assert (size
>= 0);
2747 gcc_checking_assert (time
>= 0);
2748 /* nonspecialized_time should be always bigger than specialized time.
2749 Roundoff issues however may get into the way. */
2750 gcc_checking_assert ((nonspecialized_time
- time
) >= -1);
2752 /* Roundoff issues may make specialized time bigger than nonspecialized
2753 time. We do not really want that to happen because some heurstics
2754 may get confused by seeing negative speedups. */
2755 if (time
> nonspecialized_time
)
2756 time
= nonspecialized_time
;
2758 if (info
->loop_iterations
2759 && !info
->loop_iterations
->evaluate (possible_truths
))
2760 hints
|= INLINE_HINT_loop_iterations
;
2761 if (info
->loop_stride
2762 && !info
->loop_stride
->evaluate (possible_truths
))
2763 hints
|= INLINE_HINT_loop_stride
;
2764 if (info
->array_index
2765 && !info
->array_index
->evaluate (possible_truths
))
2766 hints
|= INLINE_HINT_array_index
;
2768 hints
|= INLINE_HINT_in_scc
;
2769 if (DECL_DECLARED_INLINE_P (node
->decl
))
2770 hints
|= INLINE_HINT_declared_inline
;
2772 size
= RDIV (size
, ipa_fn_summary::size_scale
);
2773 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
2775 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2776 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
2777 time
.to_double (), nonspecialized_time
.to_double ());
2780 if (ret_nonspecialized_time
)
2781 *ret_nonspecialized_time
= nonspecialized_time
;
2785 *ret_min_size
= min_size
;
2792 /* Estimate size and time needed to execute callee of EDGE assuming that
2793 parameters known to be constant at caller of EDGE are propagated.
2794 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2795 and types for parameters. */
2798 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
2799 vec
<tree
> known_vals
,
2800 vec
<ipa_polymorphic_call_context
>
2802 vec
<ipa_agg_jump_function_p
> known_aggs
,
2803 int *ret_size
, sreal
*ret_time
,
2804 sreal
*ret_nonspec_time
,
2807 clause_t clause
, nonspec_clause
;
2809 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
2810 &clause
, &nonspec_clause
);
2811 estimate_node_size_and_time (node
, clause
, nonspec_clause
,
2812 known_vals
, known_contexts
,
2813 known_aggs
, ret_size
, NULL
, ret_time
,
2814 ret_nonspec_time
, hints
, vNULL
);
2818 /* Update summary information of inline clones after inlining.
2819 Compute peak stack usage. */
2822 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
2824 struct cgraph_edge
*e
;
2825 struct ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (node
);
2826 struct ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (node
->callers
->caller
);
2829 callee_info
->stack_frame_offset
2830 = caller_info
->stack_frame_offset
2831 + caller_info
->estimated_self_stack_size
;
2832 peak
= callee_info
->stack_frame_offset
2833 + callee_info
->estimated_self_stack_size
;
2834 if (ipa_fn_summaries
->get (node
->global
.inlined_to
)->estimated_stack_size
< peak
)
2835 ipa_fn_summaries
->get (node
->global
.inlined_to
)->estimated_stack_size
= peak
;
2836 ipa_propagate_frequency (node
);
2837 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2839 if (!e
->inline_failed
)
2840 inline_update_callee_summaries (e
->callee
, depth
);
2841 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2843 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2844 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2847 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2848 When functoin A is inlined in B and A calls C with parameter that
2849 changes with probability PROB1 and C is known to be passthroug
2850 of argument if B that change with probability PROB2, the probability
2851 of change is now PROB1*PROB2. */
2854 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
2855 struct cgraph_edge
*edge
)
2857 if (ipa_node_params_sum
)
2860 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2861 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2862 struct ipa_call_summary
*inlined_es
2863 = ipa_call_summaries
->get (inlined_edge
);
2865 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
2867 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
2868 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2869 || jfunc
->type
== IPA_JF_ANCESTOR
)
2871 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
2872 ? ipa_get_jf_pass_through_formal_id (jfunc
)
2873 : ipa_get_jf_ancestor_formal_id (jfunc
);
2874 if (id
< (int) inlined_es
->param
.length ())
2876 int prob1
= es
->param
[i
].change_prob
;
2877 int prob2
= inlined_es
->param
[id
].change_prob
;
2878 int prob
= combine_probabilities (prob1
, prob2
);
2880 if (prob1
&& prob2
&& !prob
)
2883 es
->param
[i
].change_prob
= prob
;
2890 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2892 Remap predicates of callees of NODE. Rest of arguments match
2895 Also update change probabilities. */
2898 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
2899 struct cgraph_node
*node
,
2900 struct ipa_fn_summary
*info
,
2901 struct ipa_fn_summary
*callee_info
,
2902 vec
<int> operand_map
,
2903 vec
<int> offset_map
,
2904 clause_t possible_truths
,
2905 predicate
*toplev_predicate
)
2907 struct cgraph_edge
*e
, *next
;
2908 for (e
= node
->callees
; e
; e
= next
)
2910 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2912 next
= e
->next_callee
;
2914 if (e
->inline_failed
)
2916 remap_edge_change_prob (inlined_edge
, e
);
2920 p
= es
->predicate
->remap_after_inlining
2921 (info
, callee_info
, operand_map
,
2922 offset_map
, possible_truths
,
2924 edge_set_predicate (e
, &p
);
2927 edge_set_predicate (e
, toplev_predicate
);
2930 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
2931 operand_map
, offset_map
, possible_truths
,
2934 for (e
= node
->indirect_calls
; e
; e
= next
)
2936 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2938 next
= e
->next_callee
;
2940 remap_edge_change_prob (inlined_edge
, e
);
2943 p
= es
->predicate
->remap_after_inlining
2944 (info
, callee_info
, operand_map
, offset_map
,
2945 possible_truths
, *toplev_predicate
);
2946 edge_set_predicate (e
, &p
);
2949 edge_set_predicate (e
, toplev_predicate
);
2953 /* Same as remap_predicate, but set result into hint *HINT. */
2956 remap_hint_predicate (struct ipa_fn_summary
*info
,
2957 struct ipa_fn_summary
*callee_info
,
2959 vec
<int> operand_map
,
2960 vec
<int> offset_map
,
2961 clause_t possible_truths
,
2962 predicate
*toplev_predicate
)
2968 p
= (*hint
)->remap_after_inlining
2970 operand_map
, offset_map
,
2971 possible_truths
, *toplev_predicate
);
2972 if (p
!= false && p
!= true)
2975 set_hint_predicate (hint
, p
);
2981 /* We inlined EDGE. Update summary of the function we inlined into. */
2984 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
2986 struct ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
2987 struct cgraph_node
*to
= (edge
->caller
->global
.inlined_to
2988 ? edge
->caller
->global
.inlined_to
: edge
->caller
);
2989 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
2990 clause_t clause
= 0; /* not_inline is known to be false. */
2992 vec
<int> operand_map
= vNULL
;
2993 vec
<int> offset_map
= vNULL
;
2995 predicate toplev_predicate
;
2996 predicate true_p
= true;
2997 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3000 toplev_predicate
= *es
->predicate
;
3002 toplev_predicate
= true;
3004 info
->fp_expressions
|= callee_info
->fp_expressions
;
3006 if (callee_info
->conds
)
3007 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
3008 if (ipa_node_params_sum
&& callee_info
->conds
)
3010 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3011 int count
= ipa_get_cs_argument_count (args
);
3016 operand_map
.safe_grow_cleared (count
);
3017 offset_map
.safe_grow_cleared (count
);
3019 for (i
= 0; i
< count
; i
++)
3021 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3024 /* TODO: handle non-NOPs when merging. */
3025 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3027 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3028 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3029 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3032 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3034 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3035 if (offset
>= 0 && offset
< INT_MAX
)
3037 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3038 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3040 offset_map
[i
] = offset
;
3043 operand_map
[i
] = map
;
3044 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3047 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3050 p
= e
->exec_predicate
.remap_after_inlining
3051 (info
, callee_info
, operand_map
,
3054 predicate nonconstp
;
3055 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3056 (info
, callee_info
, operand_map
,
3059 if (p
!= false && nonconstp
!= false)
3061 sreal add_time
= ((sreal
)e
->time
* edge
->frequency
) / CGRAPH_FREQ_BASE
;
3062 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3064 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3065 if (prob
!= REG_BR_PROB_BASE
3066 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3068 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3069 (double) prob
/ REG_BR_PROB_BASE
);
3071 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3074 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3075 offset_map
, clause
, &toplev_predicate
);
3076 remap_hint_predicate (info
, callee_info
,
3077 &callee_info
->loop_iterations
,
3078 operand_map
, offset_map
, clause
, &toplev_predicate
);
3079 remap_hint_predicate (info
, callee_info
,
3080 &callee_info
->loop_stride
,
3081 operand_map
, offset_map
, clause
, &toplev_predicate
);
3082 remap_hint_predicate (info
, callee_info
,
3083 &callee_info
->array_index
,
3084 operand_map
, offset_map
, clause
, &toplev_predicate
);
3086 inline_update_callee_summaries (edge
->callee
,
3087 ipa_call_summaries
->get (edge
)->loop_depth
);
3089 /* We do not maintain predicates of inlined edges, free it. */
3090 edge_set_predicate (edge
, &true_p
);
3091 /* Similarly remove param summaries. */
3092 es
->param
.release ();
3093 operand_map
.release ();
3094 offset_map
.release ();
3097 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3098 and time. Recompute it. */
3101 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3103 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
3109 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3111 info
->size
+= e
->size
;
3112 info
->time
+= e
->time
;
3114 estimate_calls_size_and_time (node
, &info
->size
, &info
->min_size
,
3116 ~(clause_t
) (1 << predicate::false_condition
),
3117 vNULL
, vNULL
, vNULL
);
3118 info
->size
= (info
->size
+ ipa_fn_summary::size_scale
/ 2) / ipa_fn_summary::size_scale
;
3122 /* This function performs intraprocedural analysis in NODE that is required to
3123 inline indirect calls. */
3126 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3128 ipa_analyze_node (node
);
3129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3131 ipa_print_node_params (dump_file
, node
);
3132 ipa_print_node_jump_functions (dump_file
, node
);
3137 /* Note function body size. */
3140 inline_analyze_function (struct cgraph_node
*node
)
3142 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3145 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3146 node
->name (), node
->order
);
3147 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3148 inline_indirect_intraprocedural_analysis (node
);
3149 compute_fn_summary (node
, false);
3152 struct cgraph_edge
*e
;
3153 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3154 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3155 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3156 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3163 /* Called when new function is inserted to callgraph late. */
3166 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3168 inline_analyze_function (node
);
3171 /* Note function body size. */
3174 ipa_fn_summary_generate (void)
3176 struct cgraph_node
*node
;
3178 FOR_EACH_DEFINED_FUNCTION (node
)
3179 if (DECL_STRUCT_FUNCTION (node
->decl
))
3180 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
3182 ipa_fn_summary_alloc ();
3184 ipa_fn_summaries
->enable_insertion_hook ();
3186 ipa_register_cgraph_hooks ();
3188 FOR_EACH_DEFINED_FUNCTION (node
)
3190 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3191 || opt_for_fn (node
->decl
, optimize
)))
3192 inline_analyze_function (node
);
3196 /* Write inline summary for edge E to OB. */
3199 read_ipa_call_summary (struct lto_input_block
*ib
, struct cgraph_edge
*e
)
3201 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3205 es
->call_stmt_size
= streamer_read_uhwi (ib
);
3206 es
->call_stmt_time
= streamer_read_uhwi (ib
);
3207 es
->loop_depth
= streamer_read_uhwi (ib
);
3209 bitpack_d bp
= streamer_read_bitpack (ib
);
3210 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3213 edge_set_predicate (e
, &p
);
3214 length
= streamer_read_uhwi (ib
);
3217 es
->param
.safe_grow_cleared (length
);
3218 for (i
= 0; i
< length
; i
++)
3219 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3224 /* Stream in inline summaries from the section. */
3227 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3230 const struct lto_function_header
*header
=
3231 (const struct lto_function_header
*) data
;
3232 const int cfg_offset
= sizeof (struct lto_function_header
);
3233 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3234 const int string_offset
= main_offset
+ header
->main_size
;
3235 struct data_in
*data_in
;
3236 unsigned int i
, count2
, j
;
3237 unsigned int f_count
;
3239 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3240 file_data
->mode_table
);
3243 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3244 header
->string_size
, vNULL
);
3245 f_count
= streamer_read_uhwi (&ib
);
3246 for (i
= 0; i
< f_count
; i
++)
3249 struct cgraph_node
*node
;
3250 struct ipa_fn_summary
*info
;
3251 lto_symtab_encoder_t encoder
;
3252 struct bitpack_d bp
;
3253 struct cgraph_edge
*e
;
3256 index
= streamer_read_uhwi (&ib
);
3257 encoder
= file_data
->symtab_node_encoder
;
3258 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3260 info
= ipa_fn_summaries
->get (node
);
3262 info
->estimated_stack_size
3263 = info
->estimated_self_stack_size
= streamer_read_uhwi (&ib
);
3264 info
->size
= info
->self_size
= streamer_read_uhwi (&ib
);
3265 info
->time
= sreal::stream_in (&ib
);
3267 bp
= streamer_read_bitpack (&ib
);
3268 info
->inlinable
= bp_unpack_value (&bp
, 1);
3269 info
->contains_cilk_spawn
= bp_unpack_value (&bp
, 1);
3270 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3272 count2
= streamer_read_uhwi (&ib
);
3273 gcc_assert (!info
->conds
);
3274 for (j
= 0; j
< count2
; j
++)
3277 c
.operand_num
= streamer_read_uhwi (&ib
);
3278 c
.size
= streamer_read_uhwi (&ib
);
3279 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3280 c
.val
= stream_read_tree (&ib
, data_in
);
3281 bp
= streamer_read_bitpack (&ib
);
3282 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3283 c
.by_ref
= bp_unpack_value (&bp
, 1);
3285 c
.offset
= streamer_read_uhwi (&ib
);
3286 vec_safe_push (info
->conds
, c
);
3288 count2
= streamer_read_uhwi (&ib
);
3289 gcc_assert (!info
->size_time_table
);
3290 for (j
= 0; j
< count2
; j
++)
3292 struct size_time_entry e
;
3294 e
.size
= streamer_read_uhwi (&ib
);
3295 e
.time
= sreal::stream_in (&ib
);
3296 e
.exec_predicate
.stream_in (&ib
);
3297 e
.nonconst_predicate
.stream_in (&ib
);
3299 vec_safe_push (info
->size_time_table
, e
);
3303 set_hint_predicate (&info
->loop_iterations
, p
);
3305 set_hint_predicate (&info
->loop_stride
, p
);
3307 set_hint_predicate (&info
->array_index
, p
);
3308 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3309 read_ipa_call_summary (&ib
, e
);
3310 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3311 read_ipa_call_summary (&ib
, e
);
3314 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3316 lto_data_in_delete (data_in
);
3320 /* Read inline summary. Jump functions are shared among ipa-cp
3321 and inliner, so when ipa-cp is active, we don't need to write them
3325 ipa_fn_summary_read (void)
3327 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3328 struct lto_file_decl_data
*file_data
;
3331 ipa_fn_summary_alloc ();
3333 while ((file_data
= file_data_vec
[j
++]))
3336 const char *data
= lto_get_section_data (file_data
,
3337 LTO_section_ipa_fn_summary
,
3340 inline_read_section (file_data
, data
, len
);
3342 /* Fatal error here. We do not want to support compiling ltrans units
3343 with different version of compiler or different flags than the WPA
3344 unit, so this should never happen. */
3345 fatal_error (input_location
,
3346 "ipa inline summary is missing in input file");
3348 ipa_register_cgraph_hooks ();
3350 ipa_prop_read_jump_functions ();
3352 gcc_assert (ipa_fn_summaries
);
3353 ipa_fn_summaries
->enable_insertion_hook ();
3357 /* Write inline summary for edge E to OB. */
3360 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3362 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3365 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3366 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3367 streamer_write_uhwi (ob
, es
->loop_depth
);
3369 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3370 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3371 streamer_write_bitpack (&bp
);
3374 es
->predicate
->stream_out (ob
);
3376 streamer_write_uhwi (ob
, 0);
3377 streamer_write_uhwi (ob
, es
->param
.length ());
3378 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3379 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3383 /* Write inline summary for node in SET.
3384 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3385 active, we don't need to write them twice. */
3388 ipa_fn_summary_write (void)
3390 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3391 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3392 unsigned int count
= 0;
3395 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3397 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3398 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3399 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3402 streamer_write_uhwi (ob
, count
);
3404 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3406 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3407 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3408 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3410 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3411 struct bitpack_d bp
;
3412 struct cgraph_edge
*edge
;
3415 struct condition
*c
;
3417 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3418 streamer_write_hwi (ob
, info
->estimated_self_stack_size
);
3419 streamer_write_hwi (ob
, info
->self_size
);
3420 info
->time
.stream_out (ob
);
3421 bp
= bitpack_create (ob
->main_stream
);
3422 bp_pack_value (&bp
, info
->inlinable
, 1);
3423 bp_pack_value (&bp
, info
->contains_cilk_spawn
, 1);
3424 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3425 streamer_write_bitpack (&bp
);
3426 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3427 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3429 streamer_write_uhwi (ob
, c
->operand_num
);
3430 streamer_write_uhwi (ob
, c
->size
);
3431 streamer_write_uhwi (ob
, c
->code
);
3432 stream_write_tree (ob
, c
->val
, true);
3433 bp
= bitpack_create (ob
->main_stream
);
3434 bp_pack_value (&bp
, c
->agg_contents
, 1);
3435 bp_pack_value (&bp
, c
->by_ref
, 1);
3436 streamer_write_bitpack (&bp
);
3437 if (c
->agg_contents
)
3438 streamer_write_uhwi (ob
, c
->offset
);
3440 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3441 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3443 streamer_write_uhwi (ob
, e
->size
);
3444 e
->time
.stream_out (ob
);
3445 e
->exec_predicate
.stream_out (ob
);
3446 e
->nonconst_predicate
.stream_out (ob
);
3448 if (info
->loop_iterations
)
3449 info
->loop_iterations
->stream_out (ob
);
3451 streamer_write_uhwi (ob
, 0);
3452 if (info
->loop_stride
)
3453 info
->loop_stride
->stream_out (ob
);
3455 streamer_write_uhwi (ob
, 0);
3456 if (info
->array_index
)
3457 info
->array_index
->stream_out (ob
);
3459 streamer_write_uhwi (ob
, 0);
3460 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3461 write_ipa_call_summary (ob
, edge
);
3462 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3463 write_ipa_call_summary (ob
, edge
);
3466 streamer_write_char_stream (ob
->main_stream
, 0);
3467 produce_asm (ob
, NULL
);
3468 destroy_output_block (ob
);
3471 ipa_prop_write_jump_functions ();
3475 /* Release inline summary. */
3478 ipa_free_fn_summary (void)
3480 struct cgraph_node
*node
;
3481 if (!ipa_call_summaries
)
3483 FOR_EACH_DEFINED_FUNCTION (node
)
3485 ipa_fn_summaries
->get (node
)->reset (node
);
3486 ipa_fn_summaries
->release ();
3487 ipa_fn_summaries
= NULL
;
3488 ipa_call_summaries
->release ();
3489 delete ipa_call_summaries
;
3490 ipa_call_summaries
= NULL
;
3491 edge_predicate_pool
.release ();
3496 const pass_data pass_data_local_fn_summary
=
3498 GIMPLE_PASS
, /* type */
3499 "local-fnsummary", /* name */
3500 OPTGROUP_INLINE
, /* optinfo_flags */
3501 TV_INLINE_PARAMETERS
, /* tv_id */
3502 0, /* properties_required */
3503 0, /* properties_provided */
3504 0, /* properties_destroyed */
3505 0, /* todo_flags_start */
3506 0, /* todo_flags_finish */
3509 class pass_local_fn_summary
: public gimple_opt_pass
3512 pass_local_fn_summary (gcc::context
*ctxt
)
3513 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3516 /* opt_pass methods: */
3517 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3518 virtual unsigned int execute (function
*)
3520 return compute_fn_summary_for_current ();
3523 }; // class pass_local_fn_summary
3528 make_pass_local_fn_summary (gcc::context
*ctxt
)
3530 return new pass_local_fn_summary (ctxt
);
3534 /* Free inline summary. */
3538 const pass_data pass_data_ipa_free_fn_summary
=
3540 SIMPLE_IPA_PASS
, /* type */
3541 "free-fnsummary", /* name */
3542 OPTGROUP_NONE
, /* optinfo_flags */
3543 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3544 0, /* properties_required */
3545 0, /* properties_provided */
3546 0, /* properties_destroyed */
3547 0, /* todo_flags_start */
3548 /* Early optimizations may make function unreachable. We can not
3549 remove unreachable functions as part of the ealry opts pass because
3550 TODOs are run before subpasses. Do it here. */
3551 ( TODO_remove_functions
| TODO_dump_symtab
), /* todo_flags_finish */
3554 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3557 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3558 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
)
3561 /* opt_pass methods: */
3562 virtual unsigned int execute (function
*)
3564 ipa_free_fn_summary ();
3568 }; // class pass_ipa_free_fn_summary
3572 simple_ipa_opt_pass
*
3573 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3575 return new pass_ipa_free_fn_summary (ctxt
);
3580 const pass_data pass_data_ipa_fn_summary
=
3582 IPA_PASS
, /* type */
3583 "fnsummary", /* name */
3584 OPTGROUP_INLINE
, /* optinfo_flags */
3585 TV_IPA_FNSUMMARY
, /* tv_id */
3586 0, /* properties_required */
3587 0, /* properties_provided */
3588 0, /* properties_destroyed */
3589 0, /* todo_flags_start */
3590 ( TODO_dump_symtab
), /* todo_flags_finish */
3593 class pass_ipa_fn_summary
: public ipa_opt_pass_d
3596 pass_ipa_fn_summary (gcc::context
*ctxt
)
3597 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
3598 ipa_fn_summary_generate
, /* generate_summary */
3599 ipa_fn_summary_write
, /* write_summary */
3600 ipa_fn_summary_read
, /* read_summary */
3601 NULL
, /* write_optimization_summary */
3602 NULL
, /* read_optimization_summary */
3603 NULL
, /* stmt_fixup */
3604 0, /* function_transform_todo_flags_start */
3605 NULL
, /* function_transform */
3606 NULL
) /* variable_transform */
3609 /* opt_pass methods: */
3610 virtual unsigned int execute (function
*) { return 0; }
3612 }; // class pass_ipa_fn_summary
3617 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
3619 return new pass_ipa_fn_summary (ctxt
);