1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
105 #include "coretypes.h"
108 #include "gimple-expr.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
128 #include "symtab-clones.h"
130 template <typename valtype
> class ipcp_value
;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype
>
135 struct ipcp_value_source
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset
;
141 /* The incoming edge that brought the value. */
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value
<valtype
> *val
;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source
*next
;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit
;
163 /* Time benefit that specializing the function for this value can bring about
165 sreal prop_time_benefit
;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
169 /* Size cost that specializing the function for this value can bring about in
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype
>
181 class ipcp_value
: public ipcp_value_base
184 /* The actual value for the given parameter. */
186 /* The list of sources from which this value originates. */
187 ipcp_value_source
<valtype
> *sources
= nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value
*next
= nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
192 ipcp_value
*scc_next
= nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value
*topo_next
= nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
198 cgraph_node
*spec_node
= nullptr;
199 /* Depth first search number and low link for topological sorting of
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
211 unsigned self_recursion_generated_level
= 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack
= false;
215 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
216 HOST_WIDE_INT offset
);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value
<valtype
> *o
)
222 return o
->scc_no
== scc_no
;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level
> 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype
>
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value
<valtype
> *values
;
249 /* Number of known values and types in this lattice. */
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable
;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval
, cgraph_edge
*cs
,
261 ipcp_value
<valtype
> *src_val
= NULL
,
262 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
263 ipcp_value
<valtype
> **val_p
= NULL
,
264 unsigned same_lat_gen_level
= 0);
265 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
268 /* Lattice of tree values with an offset to describe a part of an
271 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset
;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice
*next
;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
309 bool bottom_p () { return m_lattice_val
== IPA_BITS_VARYING
; }
310 bool top_p () { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
311 bool constant_p () { return m_lattice_val
== IPA_BITS_CONSTANT
; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int
, widest_int
);
315 widest_int
get_value () { return m_value
; }
316 widest_int
get_mask () { return m_mask
; }
318 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
319 enum tree_code
, tree
);
321 bool meet_with (widest_int
, widest_int
, unsigned);
326 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
328 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
329 If a bit in mask is set to 0, then the corresponding bit in
330 value is known to be constant. */
331 widest_int m_value
, m_mask
;
333 bool meet_with_1 (widest_int
, widest_int
, unsigned);
334 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
337 /* Lattice of value ranges. */
339 class ipcp_vr_lattice
344 inline bool bottom_p () const;
345 inline bool top_p () const;
346 inline bool set_to_bottom ();
347 bool meet_with (const value_range
*p_vr
);
348 bool meet_with (const ipcp_vr_lattice
&other
);
349 void init () { gcc_assert (m_vr
.undefined_p ()); }
350 void print (FILE * f
);
353 bool meet_with_1 (const value_range
*other_vr
);
356 /* Structure containing lattices for a parameter itself and for pieces of
357 aggregates that are passed in the parameter or by a reference in a parameter
358 plus some other useful flags. */
360 class ipcp_param_lattices
363 /* Lattice describing the value of the parameter itself. */
364 ipcp_lattice
<tree
> itself
;
365 /* Lattice describing the polymorphic contexts of a parameter. */
366 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
367 /* Lattices describing aggregate parts. */
368 ipcp_agg_lattice
*aggs
;
369 /* Lattice describing known bits. */
370 ipcp_bits_lattice bits_lattice
;
371 /* Lattice describing value range. */
372 ipcp_vr_lattice m_value_range
;
373 /* Number of aggregate lattices */
375 /* True if aggregate data were passed by reference (as opposed to by
378 /* All aggregate lattices contain a variable component (in addition to
380 bool aggs_contain_variable
;
381 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
382 for any propagation). */
385 /* There is a virtual call based on this parameter. */
389 /* Allocation pools for values and their sources in ipa-cp. */
391 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
392 ("IPA-CP constant values");
394 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
395 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
398 ("IPA-CP value sources");
400 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
401 ("IPA_CP aggregate lattices");
403 /* Base count to use in heuristics when using profile feedback. */
405 static profile_count base_count
;
407 /* Original overall size of the program. */
409 static long overall_size
, orig_overall_size
;
411 /* Node name to unique clone suffix number map. */
412 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
414 /* Return the param lattices structure corresponding to the Ith formal
415 parameter of the function described by INFO. */
416 static inline class ipcp_param_lattices
*
417 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
419 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
420 gcc_checking_assert (!info
->ipcp_orig_node
);
421 gcc_checking_assert (info
->lattices
);
422 return &(info
->lattices
[i
]);
425 /* Return the lattice corresponding to the scalar value of the Ith formal
426 parameter of the function described by INFO. */
427 static inline ipcp_lattice
<tree
> *
428 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
430 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
431 return &plats
->itself
;
434 /* Return the lattice corresponding to the scalar value of the Ith formal
435 parameter of the function described by INFO. */
436 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
437 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
439 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
440 return &plats
->ctxlat
;
443 /* Return whether LAT is a lattice with a single constant and without an
446 template <typename valtype
>
448 ipcp_lattice
<valtype
>::is_single_const ()
450 if (bottom
|| contains_variable
|| values_count
!= 1)
456 /* Print V which is extracted from a value in a lattice to F. */
459 print_ipcp_constant_value (FILE * f
, tree v
)
461 if (TREE_CODE (v
) == ADDR_EXPR
462 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
465 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
468 print_generic_expr (f
, v
);
471 /* Print V which is extracted from a value in a lattice to F. */
474 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
479 /* Print a lattice LAT to F. */
481 template <typename valtype
>
483 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
485 ipcp_value
<valtype
> *val
;
490 fprintf (f
, "BOTTOM\n");
494 if (!values_count
&& !contains_variable
)
496 fprintf (f
, "TOP\n");
500 if (contains_variable
)
502 fprintf (f
, "VARIABLE");
508 for (val
= values
; val
; val
= val
->next
)
510 if (dump_benefits
&& prev
)
512 else if (!dump_benefits
&& prev
)
517 print_ipcp_constant_value (f
, val
->value
);
521 ipcp_value_source
<valtype
> *s
;
523 if (val
->self_recursion_generated_p ())
524 fprintf (f
, " [self_gen(%i), from:",
525 val
->self_recursion_generated_level
);
527 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
528 for (s
= val
->sources
; s
; s
= s
->next
)
529 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
530 s
->cs
->sreal_frequency ().to_double ());
535 fprintf (f
, " [loc_time: %g, loc_size: %i, "
536 "prop_time: %g, prop_size: %i]\n",
537 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
538 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
545 ipcp_bits_lattice::print (FILE *f
)
548 fprintf (f
, " Bits unknown (TOP)\n");
549 else if (bottom_p ())
550 fprintf (f
, " Bits unusable (BOTTOM)\n");
553 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
554 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
559 /* Print value range lattice to F. */
562 ipcp_vr_lattice::print (FILE * f
)
564 dump_value_range (f
, &m_vr
);
567 /* Print all ipcp_lattices of all functions to F. */
570 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
572 struct cgraph_node
*node
;
575 fprintf (f
, "\nLattices:\n");
576 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
578 class ipa_node_params
*info
;
580 info
= ipa_node_params_sum
->get (node
);
581 /* Skip unoptimized functions and constprop clones since we don't make
582 lattices for them. */
583 if (!info
|| info
->ipcp_orig_node
)
585 fprintf (f
, " Node: %s:\n", node
->dump_name ());
586 count
= ipa_get_param_count (info
);
587 for (i
= 0; i
< count
; i
++)
589 struct ipcp_agg_lattice
*aglat
;
590 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
591 fprintf (f
, " param [%d]: ", i
);
592 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
593 fprintf (f
, " ctxs: ");
594 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
595 plats
->bits_lattice
.print (f
);
597 plats
->m_value_range
.print (f
);
599 if (plats
->virt_call
)
600 fprintf (f
, " virt_call flag set\n");
602 if (plats
->aggs_bottom
)
604 fprintf (f
, " AGGS BOTTOM\n");
607 if (plats
->aggs_contain_variable
)
608 fprintf (f
, " AGGS VARIABLE\n");
609 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
611 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
612 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
613 aglat
->print (f
, dump_sources
, dump_benefits
);
619 /* Determine whether it is at all technically possible to create clones of NODE
620 and store this information in the ipa_node_params structure associated
624 determine_versionability (struct cgraph_node
*node
,
625 class ipa_node_params
*info
)
627 const char *reason
= NULL
;
629 /* There are a number of generic reasons functions cannot be versioned. We
630 also cannot remove parameters if there are type attributes such as fnspec
632 if (node
->alias
|| node
->thunk
)
633 reason
= "alias or thunk";
634 else if (!node
->versionable
)
635 reason
= "not a tree_versionable_function";
636 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
637 reason
= "insufficient body availability";
638 else if (!opt_for_fn (node
->decl
, optimize
)
639 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
640 reason
= "non-optimized function";
641 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
643 /* Ideally we should clone the SIMD clones themselves and create
644 vector copies of them, so IPA-cp and SIMD clones can happily
645 coexist, but that may not be worth the effort. */
646 reason
= "function has SIMD clones";
648 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
650 /* Ideally we should clone the target clones themselves and create
651 copies of them, so IPA-cp and target clones can happily
652 coexist, but that may not be worth the effort. */
653 reason
= "function target_clones attribute";
655 /* Don't clone decls local to a comdat group; it breaks and for C++
656 decloned constructors, inlining is always better anyway. */
657 else if (node
->comdat_local_p ())
658 reason
= "comdat-local function";
659 else if (node
->calls_comdat_local
)
661 /* TODO: call is versionable if we make sure that all
662 callers are inside of a comdat group. */
663 reason
= "calls comdat-local function";
666 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
667 work only when inlined. Cloning them may still lead to better code
668 because ipa-cp will not give up on cloning further. If the function is
669 external this however leads to wrong code because we may end up producing
670 offline copy of the function. */
671 if (DECL_EXTERNAL (node
->decl
))
672 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
673 edge
= edge
->next_callee
)
674 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
676 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
677 reason
= "external function which calls va_arg_pack";
678 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
679 == BUILT_IN_VA_ARG_PACK_LEN
)
680 reason
= "external function which calls va_arg_pack_len";
683 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
684 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
685 node
->dump_name (), reason
);
687 info
->versionable
= (reason
== NULL
);
690 /* Return true if it is at all technically possible to create clones of a
694 ipcp_versionable_function_p (struct cgraph_node
*node
)
696 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
697 return info
&& info
->versionable
;
700 /* Structure holding accumulated information about callers of a node. */
702 struct caller_statistics
704 /* If requested (see below), self-recursive call counts are summed into this
706 profile_count rec_count_sum
;
707 /* The sum of all ipa counts of all the other (non-recursive) calls. */
708 profile_count count_sum
;
709 /* Sum of all frequencies for all calls. */
711 /* Number of calls and hot calls respectively. */
712 int n_calls
, n_hot_calls
;
713 /* If itself is set up, also count the number of non-self-recursive
716 /* If non-NULL, this is the node itself and calls from it should have their
717 counts included in rec_count_sum and not count_sum. */
721 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
722 from IGNORED_CALLER are not counted. */
725 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
727 stats
->rec_count_sum
= profile_count::zero ();
728 stats
->count_sum
= profile_count::zero ();
730 stats
->n_hot_calls
= 0;
731 stats
->n_nonrec_calls
= 0;
733 stats
->itself
= itself
;
736 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
737 non-thunk incoming edges to NODE. */
740 gather_caller_stats (struct cgraph_node
*node
, void *data
)
742 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
743 struct cgraph_edge
*cs
;
745 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
746 if (!cs
->caller
->thunk
)
748 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
749 if (info
&& info
->node_dead
)
752 if (cs
->count
.ipa ().initialized_p ())
754 if (stats
->itself
&& stats
->itself
== cs
->caller
)
755 stats
->rec_count_sum
+= cs
->count
.ipa ();
757 stats
->count_sum
+= cs
->count
.ipa ();
759 stats
->freq_sum
+= cs
->sreal_frequency ();
761 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
762 stats
->n_nonrec_calls
++;
764 if (cs
->maybe_hot_p ())
765 stats
->n_hot_calls
++;
771 /* Return true if this NODE is viable candidate for cloning. */
774 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
776 struct caller_statistics stats
;
778 gcc_checking_assert (node
->has_gimple_body_p ());
780 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
783 fprintf (dump_file
, "Not considering %s for cloning; "
784 "-fipa-cp-clone disabled.\n",
789 if (node
->optimize_for_size_p ())
792 fprintf (dump_file
, "Not considering %s for cloning; "
793 "optimizing it for size.\n",
798 init_caller_stats (&stats
);
799 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
801 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
804 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
809 /* When profile is available and function is hot, propagate into it even if
810 calls seems cold; constant propagation can improve function's speed
812 if (stats
.count_sum
> profile_count::zero ()
813 && node
->count
.ipa ().initialized_p ())
815 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
818 fprintf (dump_file
, "Considering %s for cloning; "
819 "usually called directly.\n",
824 if (!stats
.n_hot_calls
)
827 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
832 fprintf (dump_file
, "Considering %s for cloning.\n",
837 template <typename valtype
>
838 class value_topo_info
841 /* Head of the linked list of topologically sorted values. */
842 ipcp_value
<valtype
> *values_topo
;
843 /* Stack for creating SCCs, represented by a linked list too. */
844 ipcp_value
<valtype
> *stack
;
845 /* Counter driving the algorithm in add_val_to_toposort. */
848 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
850 void add_val (ipcp_value
<valtype
> *cur_val
);
851 void propagate_effects ();
854 /* Arrays representing a topological ordering of call graph nodes and a stack
855 of nodes used during constant propagation and also data required to perform
856 topological sort of values and propagation of benefits in the determined
862 /* Array with obtained topological order of cgraph nodes. */
863 struct cgraph_node
**order
;
864 /* Stack of cgraph nodes used during propagation within SCC until all values
865 in the SCC stabilize. */
866 struct cgraph_node
**stack
;
867 int nnodes
, stack_top
;
869 value_topo_info
<tree
> constants
;
870 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
872 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
877 /* Skip edges from and to nodes without ipa_cp enabled.
878 Ignore not available symbols. */
881 ignore_edge_p (cgraph_edge
*e
)
883 enum availability avail
;
884 cgraph_node
*ultimate_target
885 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
887 return (avail
<= AVAIL_INTERPOSABLE
888 || !opt_for_fn (ultimate_target
->decl
, optimize
)
889 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
892 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
895 build_toporder_info (class ipa_topo_info
*topo
)
897 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
898 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
900 gcc_checking_assert (topo
->stack_top
== 0);
901 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
905 /* Free information about strongly connected components and the arrays in
909 free_toporder_info (class ipa_topo_info
*topo
)
911 ipa_free_postorder_info ();
916 /* Add NODE to the stack in TOPO, unless it is already there. */
919 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
921 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
922 if (info
->node_enqueued
)
924 info
->node_enqueued
= 1;
925 topo
->stack
[topo
->stack_top
++] = node
;
928 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
931 static struct cgraph_node
*
932 pop_node_from_stack (class ipa_topo_info
*topo
)
936 struct cgraph_node
*node
;
938 node
= topo
->stack
[topo
->stack_top
];
939 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
946 /* Set lattice LAT to bottom and return true if it previously was not set as
949 template <typename valtype
>
951 ipcp_lattice
<valtype
>::set_to_bottom ()
958 /* Mark lattice as containing an unknown value and return true if it previously
959 was not marked as such. */
961 template <typename valtype
>
963 ipcp_lattice
<valtype
>::set_contains_variable ()
965 bool ret
= !contains_variable
;
966 contains_variable
= true;
970 /* Set all aggregate lattices in PLATS to bottom and return true if they were
971 not previously set as such. */
974 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
976 bool ret
= !plats
->aggs_bottom
;
977 plats
->aggs_bottom
= true;
981 /* Mark all aggregate lattices in PLATS as containing an unknown value and
982 return true if they were not previously marked as such. */
985 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
987 bool ret
= !plats
->aggs_contain_variable
;
988 plats
->aggs_contain_variable
= true;
993 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
995 return meet_with_1 (&other
.m_vr
);
998 /* Meet the current value of the lattice with value range described by VR
1002 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
1004 return meet_with_1 (p_vr
);
1007 /* Meet the current value of the lattice with value range described by
1008 OTHER_VR lattice. Return TRUE if anything changed. */
1011 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
1016 if (other_vr
->varying_p ())
1017 return set_to_bottom ();
1019 value_range
save (m_vr
);
1020 m_vr
.union_ (other_vr
);
1021 return !m_vr
.equal_p (save
);
1024 /* Return true if value range information in the lattice is yet unknown. */
1027 ipcp_vr_lattice::top_p () const
1029 return m_vr
.undefined_p ();
1032 /* Return true if value range information in the lattice is known to be
1036 ipcp_vr_lattice::bottom_p () const
1038 return m_vr
.varying_p ();
1041 /* Set value range information in the lattice to bottom. Return true if it
1042 previously was in a different state. */
1045 ipcp_vr_lattice::set_to_bottom ()
1047 if (m_vr
.varying_p ())
1049 /* ?? We create all sorts of VARYING ranges for floats, structures,
1050 and other types which we cannot handle as ranges. We should
1051 probably avoid handling them throughout the pass, but it's easier
1052 to create a sensible VARYING here and let the lattice
1054 m_vr
.set_varying (integer_type_node
);
1058 /* Set lattice value to bottom, if it already isn't the case. */
1061 ipcp_bits_lattice::set_to_bottom ()
1065 m_lattice_val
= IPA_BITS_VARYING
;
1071 /* Set to constant if it isn't already. Only meant to be called
1072 when switching state from TOP. */
1075 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1077 gcc_assert (top_p ());
1078 m_lattice_val
= IPA_BITS_CONSTANT
;
1079 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1084 /* Convert operand to value, mask form. */
1087 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1089 wide_int
get_nonzero_bits (const_tree
);
1091 if (TREE_CODE (operand
) == INTEGER_CST
)
1093 *valuep
= wi::to_widest (operand
);
1103 /* Meet operation, similar to ccp_lattice_meet, we xor values
1104 if this->value, value have different values at same bit positions, we want
1105 to drop that bit to varying. Return true if mask is changed.
1106 This function assumes that the lattice value is in CONSTANT state */
1109 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1112 gcc_assert (constant_p ());
1114 widest_int old_mask
= m_mask
;
1115 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1118 if (wi::sext (m_mask
, precision
) == -1)
1119 return set_to_bottom ();
1121 return m_mask
!= old_mask
;
1124 /* Meet the bits lattice with operand
1125 described by <value, mask, sgn, precision. */
1128 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1136 if (wi::sext (mask
, precision
) == -1)
1137 return set_to_bottom ();
1138 return set_to_constant (value
, mask
);
1141 return meet_with_1 (value
, mask
, precision
);
1144 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1145 if code is binary operation or bit_value_unop (other) if code is unary op.
1146 In the case when code is nop_expr, no adjustment is required. */
1149 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1150 signop sgn
, enum tree_code code
, tree operand
)
1152 if (other
.bottom_p ())
1153 return set_to_bottom ();
1155 if (bottom_p () || other
.top_p ())
1158 widest_int adjusted_value
, adjusted_mask
;
1160 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1162 tree type
= TREE_TYPE (operand
);
1163 widest_int o_value
, o_mask
;
1164 get_value_and_mask (operand
, &o_value
, &o_mask
);
1166 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1167 sgn
, precision
, other
.get_value (), other
.get_mask (),
1168 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1170 if (wi::sext (adjusted_mask
, precision
) == -1)
1171 return set_to_bottom ();
1174 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1176 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1177 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1180 if (wi::sext (adjusted_mask
, precision
) == -1)
1181 return set_to_bottom ();
1185 return set_to_bottom ();
1189 if (wi::sext (adjusted_mask
, precision
) == -1)
1190 return set_to_bottom ();
1191 return set_to_constant (adjusted_value
, adjusted_mask
);
1194 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
);
1197 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1198 return true is any of them has not been marked as such so far. */
1201 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1204 ret
= plats
->itself
.set_contains_variable ();
1205 ret
|= plats
->ctxlat
.set_contains_variable ();
1206 ret
|= set_agg_lats_contain_variable (plats
);
1207 ret
|= plats
->bits_lattice
.set_to_bottom ();
1208 ret
|= plats
->m_value_range
.set_to_bottom ();
1212 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1213 points to by the number of callers to NODE. */
1216 count_callers (cgraph_node
*node
, void *data
)
1218 int *caller_count
= (int *) data
;
1220 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1221 /* Local thunks can be handled transparently, but if the thunk cannot
1222 be optimized out, count it as a real use. */
1223 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1228 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1229 the one caller of some other node. Set the caller's corresponding flag. */
1232 set_single_call_flag (cgraph_node
*node
, void *)
1234 cgraph_edge
*cs
= node
->callers
;
1235 /* Local thunks can be handled transparently, skip them. */
1236 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1237 cs
= cs
->next_caller
;
1239 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1241 info
->node_calling_single_call
= true;
1247 /* Initialize ipcp_lattices. */
1250 initialize_node_lattices (struct cgraph_node
*node
)
1252 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1253 struct cgraph_edge
*ie
;
1254 bool disable
= false, variable
= false;
1257 gcc_checking_assert (node
->has_gimple_body_p ());
1259 if (!ipa_get_param_count (info
))
1261 else if (node
->local
)
1263 int caller_count
= 0;
1264 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1266 gcc_checking_assert (caller_count
> 0);
1267 if (caller_count
== 1)
1268 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1273 /* When cloning is allowed, we can assume that externally visible
1274 functions are not called. We will compensate this by cloning
1276 if (ipcp_versionable_function_p (node
)
1277 && ipcp_cloning_candidate_p (node
))
1283 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1284 && !node
->alias
&& !node
->thunk
)
1286 fprintf (dump_file
, "Initializing lattices of %s\n",
1287 node
->dump_name ());
1288 if (disable
|| variable
)
1289 fprintf (dump_file
, " Marking all lattices as %s\n",
1290 disable
? "BOTTOM" : "VARIABLE");
1293 auto_vec
<bool, 16> surviving_params
;
1294 bool pre_modified
= false;
1296 clone_info
*cinfo
= clone_info::get (node
);
1298 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1300 /* At the moment all IPA optimizations should use the number of
1301 parameters of the prevailing decl as the m_always_copy_start.
1302 Handling any other value would complicate the code below, so for the
1303 time bing let's only assert it is so. */
1304 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1305 == ipa_get_param_count (info
))
1306 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1308 pre_modified
= true;
1309 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1311 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1312 && !node
->alias
&& !node
->thunk
)
1315 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1317 if (j
< (int) surviving_params
.length ()
1318 && surviving_params
[j
])
1323 " The following parameters are dead on arrival:");
1326 fprintf (dump_file
, " %u", j
);
1329 fprintf (dump_file
, "\n");
1333 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1335 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1337 || !ipa_get_type (info
, i
)
1338 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1339 || !surviving_params
[i
])))
1341 plats
->itself
.set_to_bottom ();
1342 plats
->ctxlat
.set_to_bottom ();
1343 set_agg_lats_to_bottom (plats
);
1344 plats
->bits_lattice
.set_to_bottom ();
1345 plats
->m_value_range
.m_vr
= value_range ();
1346 plats
->m_value_range
.set_to_bottom ();
1350 plats
->m_value_range
.init ();
1352 set_all_contains_variable (plats
);
1356 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1357 if (ie
->indirect_info
->polymorphic
1358 && ie
->indirect_info
->param_index
>= 0)
1360 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1361 ipa_get_parm_lattices (info
,
1362 ie
->indirect_info
->param_index
)->virt_call
= 1;
1366 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1370 ipacp_value_safe_for_type (tree param_type
, tree value
)
1372 tree val_type
= TREE_TYPE (value
);
1373 if (param_type
== val_type
1374 || useless_type_conversion_p (param_type
, val_type
)
1375 || fold_convertible_p (param_type
, value
))
1381 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1384 values_equal_for_ipcp_p (tree x
, tree y
)
1386 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
1391 if (TREE_CODE (x
) == ADDR_EXPR
1392 && TREE_CODE (y
) == ADDR_EXPR
1393 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
1394 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
1395 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
1396 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
1398 return operand_equal_p (x
, y
, 0);
1401 /* Return the result of a (possibly arithmetic) operation on the constant
1402 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1403 the type of the parameter to which the result is passed. Return
1404 NULL_TREE if that cannot be determined or be considered an
1405 interprocedural invariant. */
1408 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1413 if (opcode
== NOP_EXPR
)
1415 if (!is_gimple_ip_invariant (input
))
1418 if (opcode
== ASSERT_EXPR
)
1420 if (values_equal_for_ipcp_p (input
, operand
))
1428 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1429 res_type
= boolean_type_node
;
1430 else if (expr_type_first_operand_type_p (opcode
))
1431 res_type
= TREE_TYPE (input
);
1436 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1437 res
= fold_unary (opcode
, res_type
, input
);
1439 res
= fold_binary (opcode
, res_type
, input
, operand
);
1441 if (res
&& !is_gimple_ip_invariant (res
))
1447 /* Return the result of a (possibly arithmetic) pass through jump function
1448 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1449 to which the result is passed. Return NULL_TREE if that cannot be
1450 determined or be considered an interprocedural invariant. */
1453 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1456 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1458 ipa_get_jf_pass_through_operand (jfunc
),
1462 /* Return the result of an ancestor jump function JFUNC on the constant value
1463 INPUT. Return NULL_TREE if that cannot be determined. */
1466 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1468 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1469 if (TREE_CODE (input
) == ADDR_EXPR
)
1471 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1472 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1473 if (known_eq (off
, 0))
1475 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1476 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1477 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1478 build_int_cst (ptr_type_node
, byte_offset
)));
1484 /* Determine whether JFUNC evaluates to a single known constant value and if
1485 so, return it. Otherwise return NULL. INFO describes the caller node or
1486 the one it is inlined to, so that pass-through jump functions can be
1487 evaluated. PARM_TYPE is the type of the parameter to which the result is
1491 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1494 if (jfunc
->type
== IPA_JF_CONST
)
1495 return ipa_get_jf_constant (jfunc
);
1496 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1497 || jfunc
->type
== IPA_JF_ANCESTOR
)
1502 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1503 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1505 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1507 if (info
->ipcp_orig_node
)
1508 input
= info
->known_csts
[idx
];
1511 ipcp_lattice
<tree
> *lat
;
1514 || idx
>= ipa_get_param_count (info
))
1516 lat
= ipa_get_scalar_lat (info
, idx
);
1517 if (!lat
->is_single_const ())
1519 input
= lat
->values
->value
;
1525 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1526 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1528 return ipa_get_jf_ancestor_result (jfunc
, input
);
1534 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1535 that INFO describes the caller node or the one it is inlined to, CS is the
1536 call graph edge corresponding to JFUNC and CSIDX index of the described
1539 ipa_polymorphic_call_context
1540 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1541 ipa_jump_func
*jfunc
)
1543 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1544 ipa_polymorphic_call_context ctx
;
1545 ipa_polymorphic_call_context
*edge_ctx
1546 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1548 if (edge_ctx
&& !edge_ctx
->useless_p ())
1551 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1552 || jfunc
->type
== IPA_JF_ANCESTOR
)
1554 ipa_polymorphic_call_context srcctx
;
1556 bool type_preserved
= true;
1557 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1559 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1561 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1562 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1566 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1567 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1569 if (info
->ipcp_orig_node
)
1571 if (info
->known_contexts
.exists ())
1572 srcctx
= info
->known_contexts
[srcidx
];
1577 || srcidx
>= ipa_get_param_count (info
))
1579 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1580 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1581 if (!lat
->is_single_const ())
1583 srcctx
= lat
->values
->value
;
1585 if (srcctx
.useless_p ())
1587 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1588 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1589 if (!type_preserved
)
1590 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1591 srcctx
.combine_with (ctx
);
1598 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1599 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1600 the result is a range or an anti-range. */
1603 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1604 value_range
*src_vr
,
1605 enum tree_code operation
,
1606 tree dst_type
, tree src_type
)
1608 range_fold_unary_expr (dst_vr
, operation
, dst_type
, src_vr
, src_type
);
1609 if (dst_vr
->varying_p () || dst_vr
->undefined_p ())
1614 /* Determine value_range of JFUNC given that INFO describes the caller node or
1615 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1616 and PARM_TYPE of the parameter. */
1619 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1620 ipa_jump_func
*jfunc
, tree parm_type
)
1625 ipa_vr_operation_and_type_effects (&vr
,
1627 NOP_EXPR
, parm_type
,
1628 jfunc
->m_vr
->type ());
1629 if (vr
.singleton_p ())
1631 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1634 ipcp_transformation
*sum
1635 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1636 ? cs
->caller
->inlined_to
1638 if (!sum
|| !sum
->m_vr
)
1641 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1643 if (!(*sum
->m_vr
)[idx
].known
)
1645 tree vr_type
= ipa_get_type (info
, idx
);
1646 value_range
srcvr (wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].min
),
1647 wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].max
),
1648 (*sum
->m_vr
)[idx
].type
);
1650 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1652 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1656 if (ipa_vr_operation_and_type_effects (&res
,
1658 operation
, parm_type
,
1664 value_range op_res
, res
;
1665 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1666 value_range
op_vr (op
, op
);
1668 range_fold_binary_expr (&op_res
, operation
, vr_type
, &srcvr
, &op_vr
);
1669 if (ipa_vr_operation_and_type_effects (&res
,
1671 NOP_EXPR
, parm_type
,
1679 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1680 parameter with the given INDEX. */
1683 get_clone_agg_value (struct cgraph_node
*node
, HOST_WIDE_INT offset
,
1686 struct ipa_agg_replacement_value
*aggval
;
1688 aggval
= ipa_get_agg_replacements_for_node (node
);
1691 if (aggval
->offset
== offset
1692 && aggval
->index
== index
)
1693 return aggval
->value
;
1694 aggval
= aggval
->next
;
1699 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1700 single known constant value and if so, return it. Otherwise return NULL.
1701 NODE and INFO describes the caller node or the one it is inlined to, and
1702 its related info. */
1705 ipa_agg_value_from_node (class ipa_node_params
*info
,
1706 struct cgraph_node
*node
,
1707 struct ipa_agg_jf_item
*item
)
1709 tree value
= NULL_TREE
;
1712 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
1715 if (item
->jftype
== IPA_JF_CONST
)
1716 return item
->value
.constant
;
1718 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1719 || item
->jftype
== IPA_JF_LOAD_AGG
);
1721 src_idx
= item
->value
.pass_through
.formal_id
;
1723 if (info
->ipcp_orig_node
)
1725 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1726 value
= info
->known_csts
[src_idx
];
1728 value
= get_clone_agg_value (node
, item
->value
.load_agg
.offset
,
1731 else if (info
->lattices
)
1733 class ipcp_param_lattices
*src_plats
1734 = ipa_get_parm_lattices (info
, src_idx
);
1736 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1738 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1740 if (!lat
->is_single_const ())
1743 value
= lat
->values
->value
;
1745 else if (src_plats
->aggs
1746 && !src_plats
->aggs_bottom
1747 && !src_plats
->aggs_contain_variable
1748 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1750 struct ipcp_agg_lattice
*aglat
;
1752 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1754 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1757 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1759 if (aglat
->is_single_const ())
1760 value
= aglat
->values
->value
;
1770 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1772 tree load_type
= item
->value
.load_agg
.type
;
1773 tree value_type
= TREE_TYPE (value
);
1775 /* Ensure value type is compatible with load type. */
1776 if (!useless_type_conversion_p (load_type
, value_type
))
1780 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1782 item
->value
.pass_through
.operand
,
1786 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1787 an aggregate and if so, return it. Otherwise return an empty set. NODE
1788 and INFO describes the caller node or the one it is inlined to, and its
1791 struct ipa_agg_value_set
1792 ipa_agg_value_set_from_jfunc (class ipa_node_params
*info
, cgraph_node
*node
,
1793 struct ipa_agg_jump_function
*agg_jfunc
)
1795 struct ipa_agg_value_set agg
;
1796 struct ipa_agg_jf_item
*item
;
1800 agg
.by_ref
= agg_jfunc
->by_ref
;
1802 FOR_EACH_VEC_SAFE_ELT (agg_jfunc
->items
, i
, item
)
1804 tree value
= ipa_agg_value_from_node (info
, node
, item
);
1808 struct ipa_agg_value value_item
;
1810 value_item
.offset
= item
->offset
;
1811 value_item
.value
= value
;
1813 agg
.items
.safe_push (value_item
);
1819 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1820 bottom, not containing a variable component and without any known value at
1824 ipcp_verify_propagated_values (void)
1826 struct cgraph_node
*node
;
1828 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1830 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1831 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1832 || !opt_for_fn (node
->decl
, optimize
))
1834 int i
, count
= ipa_get_param_count (info
);
1836 for (i
= 0; i
< count
; i
++)
1838 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1841 && !lat
->contains_variable
1842 && lat
->values_count
== 0)
1846 symtab
->dump (dump_file
);
1847 fprintf (dump_file
, "\nIPA lattices after constant "
1848 "propagation, before gcc_unreachable:\n");
1849 print_all_lattices (dump_file
, true, false);
1858 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1861 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1862 ipa_polymorphic_call_context y
)
1864 return x
.equal_to (y
);
1868 /* Add a new value source to the value represented by THIS, marking that a
1869 value comes from edge CS and (if the underlying jump function is a
1870 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1871 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1872 scalar value of the parameter itself or the offset within an aggregate. */
1874 template <typename valtype
>
1876 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1877 int src_idx
, HOST_WIDE_INT offset
)
1879 ipcp_value_source
<valtype
> *src
;
1881 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1882 src
->offset
= offset
;
1885 src
->index
= src_idx
;
1887 src
->next
= sources
;
1891 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1892 SOURCE and clear all other fields. */
1894 static ipcp_value
<tree
> *
1895 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1897 ipcp_value
<tree
> *val
;
1899 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1901 val
->self_recursion_generated_level
= same_lat_gen_level
;
1905 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1906 value to SOURCE and clear all other fields. */
1908 static ipcp_value
<ipa_polymorphic_call_context
> *
1909 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1910 unsigned same_lat_gen_level
)
1912 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1914 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1915 ipcp_value
<ipa_polymorphic_call_context
>();
1917 val
->self_recursion_generated_level
= same_lat_gen_level
;
1921 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1922 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1923 meaning. OFFSET -1 means the source is scalar and not a part of an
1924 aggregate. If non-NULL, VAL_P records address of existing or newly added
1927 If the value is generated for a self-recursive call as a result of an
1928 arithmetic pass-through jump-function acting on a value in the same lattice,
1929 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1930 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1932 template <typename valtype
>
1934 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1935 ipcp_value
<valtype
> *src_val
,
1936 int src_idx
, HOST_WIDE_INT offset
,
1937 ipcp_value
<valtype
> **val_p
,
1938 unsigned same_lat_gen_level
)
1940 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
1948 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
1949 if (values_equal_for_ipcp_p (val
->value
, newval
))
1954 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
1955 val
->self_recursion_generated_level
= same_lat_gen_level
;
1957 if (ipa_edge_within_scc (cs
))
1959 ipcp_value_source
<valtype
> *s
;
1960 for (s
= val
->sources
; s
; s
= s
->next
)
1961 if (s
->cs
== cs
&& s
->val
== src_val
)
1967 val
->add_source (cs
, src_val
, src_idx
, offset
);
1971 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
1972 param_ipa_cp_value_list_size
))
1974 /* We can only free sources, not the values themselves, because sources
1975 of other values in this SCC might point to them. */
1976 for (val
= values
; val
; val
= val
->next
)
1978 while (val
->sources
)
1980 ipcp_value_source
<valtype
> *src
= val
->sources
;
1981 val
->sources
= src
->next
;
1982 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
1986 return set_to_bottom ();
1990 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
1991 val
->add_source (cs
, src_val
, src_idx
, offset
);
1994 /* Add the new value to end of value list, which can reduce iterations
1995 of propagation stage for recursive function. */
1997 last_val
->next
= val
;
2007 /* A helper function that returns result of operation specified by OPCODE on
2008 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2009 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2010 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2014 get_val_across_arith_op (enum tree_code opcode
,
2017 ipcp_value
<tree
> *src_val
,
2020 tree opnd1
= src_val
->value
;
2022 /* Skip source values that is incompatible with specified type. */
2024 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2027 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2030 /* Propagate values through an arithmetic transformation described by a jump
2031 function associated with edge CS, taking values from SRC_LAT and putting
2032 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2033 OPND2 is a constant value if transformation is a binary operation.
2034 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2035 a part of the aggregate. SRC_IDX is the index of the source parameter.
2036 RES_TYPE is the value type of result being propagated into. Return true if
2037 DEST_LAT changed. */
2040 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2041 enum tree_code opcode
,
2044 ipcp_lattice
<tree
> *src_lat
,
2045 ipcp_lattice
<tree
> *dest_lat
,
2046 HOST_WIDE_INT src_offset
,
2050 ipcp_value
<tree
> *src_val
;
2053 /* Due to circular dependencies, propagating within an SCC through arithmetic
2054 transformation would create infinite number of values. But for
2055 self-feeding recursive function, we could allow propagation in a limited
2056 count, and this can enable a simple kind of recursive function versioning.
2057 For other scenario, we would just make lattices bottom. */
2058 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2062 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2063 param_ipa_cp_max_recursive_depth
);
2064 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2065 return dest_lat
->set_contains_variable ();
2067 /* No benefit if recursive execution is in low probability. */
2068 if (cs
->sreal_frequency () * 100
2069 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2070 param_ipa_cp_min_recursive_probability
))
2071 return dest_lat
->set_contains_variable ();
2073 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2075 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2077 /* Now we do not use self-recursively generated value as propagation
2078 source, this is absolutely conservative, but could avoid explosion
2079 of lattice's value space, especially when one recursive function
2080 calls another recursive. */
2081 if (src_val
->self_recursion_generated_p ())
2083 ipcp_value_source
<tree
> *s
;
2085 /* If the lattice has already been propagated for the call site,
2086 no need to do that again. */
2087 for (s
= src_val
->sources
; s
; s
= s
->next
)
2089 return dest_lat
->set_contains_variable ();
2092 val_seeds
.safe_push (src_val
);
2095 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2097 /* Recursively generate lattice values with a limited count. */
2098 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2100 for (int j
= 1; j
< max_recursive_depth
; j
++)
2102 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2105 || !ipacp_value_safe_for_type (res_type
, cstval
))
2108 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2109 src_offset
, &src_val
, j
);
2110 gcc_checking_assert (src_val
);
2113 ret
|= dest_lat
->set_contains_variable ();
2116 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2118 /* Now we do not use self-recursively generated value as propagation
2119 source, otherwise it is easy to make value space of normal lattice
2121 if (src_val
->self_recursion_generated_p ())
2123 ret
|= dest_lat
->set_contains_variable ();
2127 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2130 && ipacp_value_safe_for_type (res_type
, cstval
))
2131 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2134 ret
|= dest_lat
->set_contains_variable ();
2140 /* Propagate values through a pass-through jump function JFUNC associated with
2141 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2142 is the index of the source parameter. PARM_TYPE is the type of the
2143 parameter to which the result is passed. */
2146 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2147 ipcp_lattice
<tree
> *src_lat
,
2148 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2151 return propagate_vals_across_arith_jfunc (cs
,
2152 ipa_get_jf_pass_through_operation (jfunc
),
2154 ipa_get_jf_pass_through_operand (jfunc
),
2155 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2158 /* Propagate values through an ancestor jump function JFUNC associated with
2159 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2160 is the index of the source parameter. */
2163 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2164 struct ipa_jump_func
*jfunc
,
2165 ipcp_lattice
<tree
> *src_lat
,
2166 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2169 ipcp_value
<tree
> *src_val
;
2172 if (ipa_edge_within_scc (cs
))
2173 return dest_lat
->set_contains_variable ();
2175 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2177 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2179 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2180 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2182 ret
|= dest_lat
->set_contains_variable ();
2188 /* Propagate scalar values across jump function JFUNC that is associated with
2189 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2190 parameter to which the result is passed. */
2193 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2194 struct ipa_jump_func
*jfunc
,
2195 ipcp_lattice
<tree
> *dest_lat
,
2198 if (dest_lat
->bottom
)
2201 if (jfunc
->type
== IPA_JF_CONST
)
2203 tree val
= ipa_get_jf_constant (jfunc
);
2204 if (ipacp_value_safe_for_type (param_type
, val
))
2205 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2207 return dest_lat
->set_contains_variable ();
2209 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2210 || jfunc
->type
== IPA_JF_ANCESTOR
)
2212 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2213 ipcp_lattice
<tree
> *src_lat
;
2217 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2218 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2220 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2222 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2223 if (src_lat
->bottom
)
2224 return dest_lat
->set_contains_variable ();
2226 /* If we would need to clone the caller and cannot, do not propagate. */
2227 if (!ipcp_versionable_function_p (cs
->caller
)
2228 && (src_lat
->contains_variable
2229 || (src_lat
->values_count
> 1)))
2230 return dest_lat
->set_contains_variable ();
2232 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2233 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2237 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2238 src_idx
, param_type
);
2240 if (src_lat
->contains_variable
)
2241 ret
|= dest_lat
->set_contains_variable ();
2246 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2247 use it for indirect inlining), we should propagate them too. */
2248 return dest_lat
->set_contains_variable ();
2251 /* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and describes argument IDX and put the values into DEST_LAT. */
2255 propagate_context_across_jump_function (cgraph_edge
*cs
,
2256 ipa_jump_func
*jfunc
, int idx
,
2257 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2259 if (dest_lat
->bottom
)
2261 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2263 bool added_sth
= false;
2264 bool type_preserved
= true;
2266 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2267 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2270 edge_ctx
= *edge_ctx_ptr
;
2272 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2273 || jfunc
->type
== IPA_JF_ANCESTOR
)
2275 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2277 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2279 /* TODO: Once we figure out how to propagate speculations, it will
2280 probably be a good idea to switch to speculation if type_preserved is
2281 not set instead of punting. */
2282 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2284 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2286 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2287 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2291 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2292 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2295 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2296 /* If we would need to clone the caller and cannot, do not propagate. */
2297 if (!ipcp_versionable_function_p (cs
->caller
)
2298 && (src_lat
->contains_variable
2299 || (src_lat
->values_count
> 1)))
2302 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2303 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2305 ipa_polymorphic_call_context cur
= src_val
->value
;
2307 if (!type_preserved
)
2308 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2309 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2310 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2311 /* TODO: In cases we know how the context is going to be used,
2312 we can improve the result by passing proper OTR_TYPE. */
2313 cur
.combine_with (edge_ctx
);
2314 if (!cur
.useless_p ())
2316 if (src_lat
->contains_variable
2317 && !edge_ctx
.equal_to (cur
))
2318 ret
|= dest_lat
->set_contains_variable ();
2319 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2328 if (!edge_ctx
.useless_p ())
2329 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2331 ret
|= dest_lat
->set_contains_variable ();
2337 /* Propagate bits across jfunc that is associated with
2338 edge cs and update dest_lattice accordingly. */
2341 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2342 ipa_jump_func
*jfunc
,
2343 ipcp_bits_lattice
*dest_lattice
)
2345 if (dest_lattice
->bottom_p ())
2348 enum availability availability
;
2349 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2350 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2351 tree parm_type
= ipa_get_type (callee_info
, idx
);
2353 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2354 transform for these cases. Similarly, we can have bad type mismatches
2355 with LTO, avoid doing anything with those too. */
2357 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2360 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2361 "param %i of %s is NULL or unsuitable for bits propagation\n",
2362 idx
, cs
->callee
->dump_name ());
2364 return dest_lattice
->set_to_bottom ();
2367 unsigned precision
= TYPE_PRECISION (parm_type
);
2368 signop sgn
= TYPE_SIGN (parm_type
);
2370 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2371 || jfunc
->type
== IPA_JF_ANCESTOR
)
2373 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2374 tree operand
= NULL_TREE
;
2375 enum tree_code code
;
2378 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2380 code
= ipa_get_jf_pass_through_operation (jfunc
);
2381 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2382 if (code
!= NOP_EXPR
)
2383 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2387 code
= POINTER_PLUS_EXPR
;
2388 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2389 unsigned HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2390 operand
= build_int_cstu (size_type_node
, offset
);
2393 class ipcp_param_lattices
*src_lats
2394 = ipa_get_parm_lattices (caller_info
, src_idx
);
2396 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2402 Assume lattice for x is bottom, however we can still propagate
2403 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2404 and we store it in jump function during analysis stage. */
2406 if (src_lats
->bits_lattice
.bottom_p ()
2408 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2411 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
, sgn
,
2415 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
2416 return dest_lattice
->set_to_bottom ();
2417 else if (jfunc
->bits
)
2418 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2421 return dest_lattice
->set_to_bottom ();
2424 /* Propagate value range across jump function JFUNC that is associated with
2425 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2429 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2430 class ipcp_param_lattices
*dest_plats
,
2433 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2435 if (dest_lat
->bottom_p ())
2439 || (!INTEGRAL_TYPE_P (param_type
)
2440 && !POINTER_TYPE_P (param_type
)))
2441 return dest_lat
->set_to_bottom ();
2443 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2445 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2446 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2447 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2448 class ipcp_param_lattices
*src_lats
2449 = ipa_get_parm_lattices (caller_info
, src_idx
);
2450 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2452 if (src_lats
->m_value_range
.bottom_p ())
2453 return dest_lat
->set_to_bottom ();
2456 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2457 ipa_vr_operation_and_type_effects (&vr
,
2458 &src_lats
->m_value_range
.m_vr
,
2459 operation
, param_type
,
2461 /* A crude way to prevent unbounded number of value range updates
2462 in SCC components. We should allow limited number of updates within
2464 else if (!ipa_edge_within_scc (cs
))
2466 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2467 value_range
op_vr (op
, op
);
2468 value_range op_res
,res
;
2470 range_fold_binary_expr (&op_res
, operation
, operand_type
,
2471 &src_lats
->m_value_range
.m_vr
, &op_vr
);
2472 ipa_vr_operation_and_type_effects (&vr
,
2474 NOP_EXPR
, param_type
,
2477 if (!vr
.undefined_p () && !vr
.varying_p ())
2482 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2485 jfunc
->m_vr
->type ()))
2488 return dest_lat
->meet_with (&vr
);
2491 else if (jfunc
->type
== IPA_JF_CONST
)
2493 tree val
= ipa_get_jf_constant (jfunc
);
2494 if (TREE_CODE (val
) == INTEGER_CST
)
2496 val
= fold_convert (param_type
, val
);
2497 if (TREE_OVERFLOW_P (val
))
2498 val
= drop_tree_overflow (val
);
2500 value_range
tmpvr (val
, val
);
2501 return dest_lat
->meet_with (&tmpvr
);
2507 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2509 jfunc
->m_vr
->type ()))
2510 return dest_lat
->meet_with (&vr
);
2512 return dest_lat
->set_to_bottom ();
2515 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2516 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2517 other cases, return false). If there are no aggregate items, set
2518 aggs_by_ref to NEW_AGGS_BY_REF. */
2521 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2522 bool new_aggs_by_ref
)
2524 if (dest_plats
->aggs
)
2526 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2528 set_agg_lats_to_bottom (dest_plats
);
2533 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2537 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2538 already existing lattice for the given OFFSET and SIZE, marking all skipped
2539 lattices as containing variable and checking for overlaps. If there is no
2540 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2541 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2542 unless there are too many already. If there are two many, return false. If
2543 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2544 skipped lattices were newly marked as containing variable, set *CHANGE to
2545 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2548 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2549 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2550 struct ipcp_agg_lattice
***aglat
,
2551 bool pre_existing
, bool *change
, int max_agg_items
)
2553 gcc_checking_assert (offset
>= 0);
2555 while (**aglat
&& (**aglat
)->offset
< offset
)
2557 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2559 set_agg_lats_to_bottom (dest_plats
);
2562 *change
|= (**aglat
)->set_contains_variable ();
2563 *aglat
= &(**aglat
)->next
;
2566 if (**aglat
&& (**aglat
)->offset
== offset
)
2568 if ((**aglat
)->size
!= val_size
)
2570 set_agg_lats_to_bottom (dest_plats
);
2573 gcc_assert (!(**aglat
)->next
2574 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2579 struct ipcp_agg_lattice
*new_al
;
2581 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2583 set_agg_lats_to_bottom (dest_plats
);
2586 if (dest_plats
->aggs_count
== max_agg_items
)
2588 dest_plats
->aggs_count
++;
2589 new_al
= ipcp_agg_lattice_pool
.allocate ();
2590 memset (new_al
, 0, sizeof (*new_al
));
2592 new_al
->offset
= offset
;
2593 new_al
->size
= val_size
;
2594 new_al
->contains_variable
= pre_existing
;
2596 new_al
->next
= **aglat
;
2602 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2603 containing an unknown value. */
2606 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2611 ret
|= aglat
->set_contains_variable ();
2612 aglat
= aglat
->next
;
2617 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2618 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2619 parameter used for lattice value sources. Return true if DEST_PLATS changed
2623 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2624 class ipcp_param_lattices
*dest_plats
,
2625 class ipcp_param_lattices
*src_plats
,
2626 int src_idx
, HOST_WIDE_INT offset_delta
)
2628 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2629 struct ipcp_agg_lattice
**dst_aglat
;
2632 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2634 if (src_plats
->aggs_bottom
)
2635 return set_agg_lats_contain_variable (dest_plats
);
2636 if (src_plats
->aggs_contain_variable
)
2637 ret
|= set_agg_lats_contain_variable (dest_plats
);
2638 dst_aglat
= &dest_plats
->aggs
;
2640 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2641 param_ipa_max_agg_items
);
2642 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2644 src_aglat
= src_aglat
->next
)
2646 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2650 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2651 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2653 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2655 dst_aglat
= &(*dst_aglat
)->next
;
2656 if (src_aglat
->bottom
)
2658 ret
|= new_al
->set_contains_variable ();
2661 if (src_aglat
->contains_variable
)
2662 ret
|= new_al
->set_contains_variable ();
2663 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2666 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2669 else if (dest_plats
->aggs_bottom
)
2672 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2676 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2677 pass-through JFUNC and if so, whether it has conform and conforms to the
2678 rules about propagating values passed by reference. */
2681 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2682 struct ipa_jump_func
*jfunc
)
2684 return src_plats
->aggs
2685 && (!src_plats
->aggs_by_ref
2686 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2689 /* Propagate values through ITEM, jump function for a part of an aggregate,
2690 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2691 associated with the jump function. Return true if AGLAT changed in any
2695 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2696 struct ipa_agg_jf_item
*item
,
2697 struct ipcp_agg_lattice
*aglat
)
2699 class ipa_node_params
*caller_info
;
2700 class ipcp_param_lattices
*src_plats
;
2701 struct ipcp_lattice
<tree
> *src_lat
;
2702 HOST_WIDE_INT src_offset
;
2707 if (item
->jftype
== IPA_JF_CONST
)
2709 tree value
= item
->value
.constant
;
2711 gcc_checking_assert (is_gimple_ip_invariant (value
));
2712 return aglat
->add_value (value
, cs
, NULL
, 0);
2715 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2716 || item
->jftype
== IPA_JF_LOAD_AGG
);
2718 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2719 src_idx
= item
->value
.pass_through
.formal_id
;
2720 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2722 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2724 load_type
= NULL_TREE
;
2725 src_lat
= &src_plats
->itself
;
2730 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2731 struct ipcp_agg_lattice
*src_aglat
;
2733 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2734 if (src_aglat
->offset
>= load_offset
)
2737 load_type
= item
->value
.load_agg
.type
;
2739 || src_aglat
->offset
> load_offset
2740 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2741 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2742 return aglat
->set_contains_variable ();
2744 src_lat
= src_aglat
;
2745 src_offset
= load_offset
;
2749 || (!ipcp_versionable_function_p (cs
->caller
)
2750 && !src_lat
->is_single_const ()))
2751 return aglat
->set_contains_variable ();
2753 ret
= propagate_vals_across_arith_jfunc (cs
,
2754 item
->value
.pass_through
.operation
,
2756 item
->value
.pass_through
.operand
,
2762 if (src_lat
->contains_variable
)
2763 ret
|= aglat
->set_contains_variable ();
2768 /* Propagate scalar values across jump function JFUNC that is associated with
2769 edge CS and put the values into DEST_LAT. */
2772 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2773 struct ipa_jump_func
*jfunc
,
2774 class ipcp_param_lattices
*dest_plats
)
2778 if (dest_plats
->aggs_bottom
)
2781 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2782 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2784 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2785 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2786 class ipcp_param_lattices
*src_plats
;
2788 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2789 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2791 /* Currently we do not produce clobber aggregate jump
2792 functions, replace with merging when we do. */
2793 gcc_assert (!jfunc
->agg
.items
);
2794 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2799 else if (jfunc
->type
== IPA_JF_ANCESTOR
2800 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2802 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2803 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2804 class ipcp_param_lattices
*src_plats
;
2806 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2807 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2809 /* Currently we do not produce clobber aggregate jump
2810 functions, replace with merging when we do. */
2811 gcc_assert (!jfunc
->agg
.items
);
2812 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2813 ipa_get_jf_ancestor_offset (jfunc
));
2815 else if (!src_plats
->aggs_by_ref
)
2816 ret
|= set_agg_lats_to_bottom (dest_plats
);
2818 ret
|= set_agg_lats_contain_variable (dest_plats
);
2822 if (jfunc
->agg
.items
)
2824 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2825 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2826 struct ipa_agg_jf_item
*item
;
2829 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2832 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2833 param_ipa_max_agg_items
);
2834 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2836 HOST_WIDE_INT val_size
;
2838 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2840 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2842 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2843 &aglat
, pre_existing
, &ret
, max_agg_items
))
2845 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2846 aglat
= &(*aglat
)->next
;
2848 else if (dest_plats
->aggs_bottom
)
2852 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2855 ret
|= set_agg_lats_contain_variable (dest_plats
);
2860 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2861 non-thunk) destination, the call passes through a thunk. */
2864 call_passes_through_thunk (cgraph_edge
*cs
)
2866 cgraph_node
*alias_or_thunk
= cs
->callee
;
2867 while (alias_or_thunk
->alias
)
2868 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2869 return alias_or_thunk
->thunk
;
2872 /* Propagate constants from the caller to the callee of CS. INFO describes the
2876 propagate_constants_across_call (struct cgraph_edge
*cs
)
2878 class ipa_node_params
*callee_info
;
2879 enum availability availability
;
2880 cgraph_node
*callee
;
2881 class ipa_edge_args
*args
;
2883 int i
, args_count
, parms_count
;
2885 callee
= cs
->callee
->function_symbol (&availability
);
2886 if (!callee
->definition
)
2888 gcc_checking_assert (callee
->has_gimple_body_p ());
2889 callee_info
= ipa_node_params_sum
->get (callee
);
2893 args
= ipa_edge_args_sum
->get (cs
);
2894 parms_count
= ipa_get_param_count (callee_info
);
2895 if (parms_count
== 0)
2898 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2899 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2901 for (i
= 0; i
< parms_count
; i
++)
2902 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2906 args_count
= ipa_get_cs_argument_count (args
);
2908 /* If this call goes through a thunk we must not propagate to the first (0th)
2909 parameter. However, we might need to uncover a thunk from below a series
2910 of aliases first. */
2911 if (call_passes_through_thunk (cs
))
2913 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2920 for (; (i
< args_count
) && (i
< parms_count
); i
++)
2922 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
2923 class ipcp_param_lattices
*dest_plats
;
2924 tree param_type
= ipa_get_type (callee_info
, i
);
2926 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
2927 if (availability
== AVAIL_INTERPOSABLE
)
2928 ret
|= set_all_contains_variable (dest_plats
);
2931 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
2932 &dest_plats
->itself
,
2934 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
2935 &dest_plats
->ctxlat
);
2937 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
2938 &dest_plats
->bits_lattice
);
2939 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
2941 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
2942 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
2943 dest_plats
, param_type
);
2945 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
2948 for (; i
< parms_count
; i
++)
2949 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
2954 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2955 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2956 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2959 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
2960 const vec
<tree
> &known_csts
,
2961 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
2962 const vec
<ipa_agg_value_set
> &known_aggs
,
2963 struct ipa_agg_replacement_value
*agg_reps
,
2966 int param_index
= ie
->indirect_info
->param_index
;
2967 HOST_WIDE_INT anc_offset
;
2971 *speculative
= false;
2973 if (param_index
== -1)
2976 if (!ie
->indirect_info
->polymorphic
)
2980 if (ie
->indirect_info
->agg_contents
)
2983 if (agg_reps
&& ie
->indirect_info
->guaranteed_unmodified
)
2987 if (agg_reps
->index
== param_index
2988 && agg_reps
->offset
== ie
->indirect_info
->offset
2989 && agg_reps
->by_ref
== ie
->indirect_info
->by_ref
)
2991 t
= agg_reps
->value
;
2994 agg_reps
= agg_reps
->next
;
2999 const ipa_agg_value_set
*agg
;
3000 if (known_aggs
.length () > (unsigned int) param_index
)
3001 agg
= &known_aggs
[param_index
];
3004 bool from_global_constant
;
3005 t
= ipa_find_agg_cst_for_param (agg
,
3006 (unsigned) param_index
3007 < known_csts
.length ()
3008 ? known_csts
[param_index
]
3010 ie
->indirect_info
->offset
,
3011 ie
->indirect_info
->by_ref
,
3012 &from_global_constant
);
3014 && !from_global_constant
3015 && !ie
->indirect_info
->guaranteed_unmodified
)
3019 else if ((unsigned) param_index
< known_csts
.length ())
3020 t
= known_csts
[param_index
];
3023 && TREE_CODE (t
) == ADDR_EXPR
3024 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3025 return TREE_OPERAND (t
, 0);
3030 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3033 gcc_assert (!ie
->indirect_info
->agg_contents
);
3034 anc_offset
= ie
->indirect_info
->offset
;
3038 /* Try to work out value of virtual table pointer value in replacements. */
3039 if (!t
&& agg_reps
&& !ie
->indirect_info
->by_ref
)
3043 if (agg_reps
->index
== param_index
3044 && agg_reps
->offset
== ie
->indirect_info
->offset
3045 && agg_reps
->by_ref
)
3047 t
= agg_reps
->value
;
3050 agg_reps
= agg_reps
->next
;
3054 /* Try to work out value of virtual table pointer value in known
3055 aggregate values. */
3056 if (!t
&& known_aggs
.length () > (unsigned int) param_index
3057 && !ie
->indirect_info
->by_ref
)
3059 const ipa_agg_value_set
*agg
= &known_aggs
[param_index
];
3060 t
= ipa_find_agg_cst_for_param (agg
,
3061 (unsigned) param_index
3062 < known_csts
.length ()
3063 ? known_csts
[param_index
] : NULL
,
3064 ie
->indirect_info
->offset
, true);
3067 /* If we found the virtual table pointer, lookup the target. */
3071 unsigned HOST_WIDE_INT offset
;
3072 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3075 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3076 vtable
, offset
, &can_refer
);
3080 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3081 || !possible_polymorphic_call_target_p
3082 (ie
, cgraph_node::get (target
)))
3084 /* Do not speculate builtin_unreachable, it is stupid! */
3085 if (ie
->indirect_info
->vptr_changed
)
3087 target
= ipa_impossible_devirt_target (ie
, target
);
3089 *speculative
= ie
->indirect_info
->vptr_changed
;
3096 /* Do we know the constant value of pointer? */
3097 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3098 t
= known_csts
[param_index
];
3100 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3102 ipa_polymorphic_call_context context
;
3103 if (known_contexts
.length () > (unsigned int) param_index
)
3105 context
= known_contexts
[param_index
];
3106 context
.offset_by (anc_offset
);
3107 if (ie
->indirect_info
->vptr_changed
)
3108 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3109 ie
->indirect_info
->otr_type
);
3112 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3113 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3114 if (!ctx2
.useless_p ())
3115 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3120 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3122 if (ie
->indirect_info
->vptr_changed
)
3123 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3124 ie
->indirect_info
->otr_type
);
3129 vec
<cgraph_node
*>targets
;
3132 targets
= possible_polymorphic_call_targets
3133 (ie
->indirect_info
->otr_type
,
3134 ie
->indirect_info
->otr_token
,
3136 if (!final
|| targets
.length () > 1)
3138 struct cgraph_node
*node
;
3141 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3142 || ie
->speculative
|| !ie
->maybe_hot_p ())
3144 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3145 ie
->indirect_info
->otr_token
,
3149 *speculative
= true;
3150 target
= node
->decl
;
3157 *speculative
= false;
3158 if (targets
.length () == 1)
3159 target
= targets
[0]->decl
;
3161 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3164 if (target
&& !possible_polymorphic_call_target_p (ie
,
3165 cgraph_node::get (target
)))
3169 target
= ipa_impossible_devirt_target (ie
, target
);
3175 /* If an indirect edge IE can be turned into a direct one based on data in
3176 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3177 whether the discovered target is only speculative guess. */
3180 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3181 ipa_call_arg_values
*avals
,
3184 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3185 avals
->m_known_contexts
,
3186 avals
->m_known_aggs
,
3190 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3193 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3194 ipa_auto_call_arg_values
*avals
,
3197 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3198 avals
->m_known_contexts
,
3199 avals
->m_known_aggs
,
3203 /* Calculate devirtualization time bonus for NODE, assuming we know information
3204 about arguments stored in AVALS. */
3207 devirtualization_time_bonus (struct cgraph_node
*node
,
3208 ipa_auto_call_arg_values
*avals
)
3210 struct cgraph_edge
*ie
;
3213 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3215 struct cgraph_node
*callee
;
3216 class ipa_fn_summary
*isummary
;
3217 enum availability avail
;
3221 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3225 /* Only bare minimum benefit for clearly un-inlineable targets. */
3227 callee
= cgraph_node::get (target
);
3228 if (!callee
|| !callee
->definition
)
3230 callee
= callee
->function_symbol (&avail
);
3231 if (avail
< AVAIL_AVAILABLE
)
3233 isummary
= ipa_fn_summaries
->get (callee
);
3234 if (!isummary
|| !isummary
->inlinable
)
3237 int size
= ipa_size_summaries
->get (callee
)->size
;
3238 /* FIXME: The values below need re-considering and perhaps also
3239 integrating into the cost metrics, at lest in some very basic way. */
3240 int max_inline_insns_auto
3241 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3242 if (size
<= max_inline_insns_auto
/ 4)
3243 res
+= 31 / ((int)speculative
+ 1);
3244 else if (size
<= max_inline_insns_auto
/ 2)
3245 res
+= 15 / ((int)speculative
+ 1);
3246 else if (size
<= max_inline_insns_auto
3247 || DECL_DECLARED_INLINE_P (callee
->decl
))
3248 res
+= 7 / ((int)speculative
+ 1);
3254 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3257 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3260 ipa_hints hints
= estimates
.hints
;
3261 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3262 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3264 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3266 if (hints
& INLINE_HINT_loop_iterations
)
3267 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3269 if (hints
& INLINE_HINT_loop_stride
)
3270 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3275 /* If there is a reason to penalize the function described by INFO in the
3276 cloning goodness evaluation, do so. */
3279 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3282 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3283 evaluation
= (evaluation
3284 * (100 - opt_for_fn (node
->decl
,
3285 param_ipa_cp_recursion_penalty
))) / 100;
3287 if (info
->node_calling_single_call
)
3288 evaluation
= (evaluation
3289 * (100 - opt_for_fn (node
->decl
,
3290 param_ipa_cp_single_call_penalty
)))
3296 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3297 and SIZE_COST and with the sum of frequencies of incoming edges to the
3298 potential new clone in FREQUENCIES. */
3301 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3302 sreal freq_sum
, profile_count count_sum
,
3305 if (time_benefit
== 0
3306 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3307 || node
->optimize_for_size_p ())
3310 gcc_assert (size_cost
> 0);
3312 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3313 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3314 if (count_sum
> profile_count::zero ())
3316 gcc_assert (base_count
> profile_count::zero ());
3317 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3318 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3319 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3324 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3325 "size: %i, count_sum: ", time_benefit
.to_double (),
3327 count_sum
.dump (dump_file
);
3328 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3329 info
->node_within_scc
3330 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3331 info
->node_calling_single_call
? ", single_call" : "",
3332 evaluation
.to_double (), eval_threshold
);
3335 return evaluation
.to_int () >= eval_threshold
;
3339 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3340 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3343 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3344 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3345 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3347 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3348 info
->node_within_scc
3349 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3350 info
->node_calling_single_call
? ", single_call" : "",
3351 evaluation
.to_double (), eval_threshold
);
3353 return evaluation
.to_int () >= eval_threshold
;
3357 /* Return all context independent values from aggregate lattices in PLATS in a
3358 vector. Return NULL if there are none. */
3360 static vec
<ipa_agg_value
>
3361 context_independent_aggregate_values (class ipcp_param_lattices
*plats
)
3363 vec
<ipa_agg_value
> res
= vNULL
;
3365 if (plats
->aggs_bottom
3366 || plats
->aggs_contain_variable
3367 || plats
->aggs_count
== 0)
3370 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
;
3372 aglat
= aglat
->next
)
3373 if (aglat
->is_single_const ())
3375 struct ipa_agg_value item
;
3376 item
.offset
= aglat
->offset
;
3377 item
.value
= aglat
->values
->value
;
3378 res
.safe_push (item
);
3383 /* Grow vectors in AVALS and fill them with information about values of
3384 parameters that are known to be independent of the context. Only calculate
3385 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3386 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3387 parameters will be stored in it.
3389 TODO: Also grow context independent value range vectors. */
3392 gather_context_independent_values (class ipa_node_params
*info
,
3393 ipa_auto_call_arg_values
*avals
,
3394 bool calculate_aggs
,
3395 int *removable_params_cost
)
3397 int i
, count
= ipa_get_param_count (info
);
3400 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3401 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3403 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
3405 if (removable_params_cost
)
3406 *removable_params_cost
= 0;
3408 for (i
= 0; i
< count
; i
++)
3410 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3411 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3413 if (lat
->is_single_const ())
3415 ipcp_value
<tree
> *val
= lat
->values
;
3416 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3417 avals
->m_known_vals
[i
] = val
->value
;
3418 if (removable_params_cost
)
3419 *removable_params_cost
3420 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3423 else if (removable_params_cost
3424 && !ipa_is_param_used (info
, i
))
3425 *removable_params_cost
3426 += ipa_get_param_move_cost (info
, i
);
3428 if (!ipa_is_param_used (info
, i
))
3431 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3432 /* Do not account known context as reason for cloning. We can see
3433 if it permits devirtualization. */
3434 if (ctxlat
->is_single_const ())
3435 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3439 vec
<ipa_agg_value
> agg_items
;
3440 struct ipa_agg_value_set
*agg
;
3442 agg_items
= context_independent_aggregate_values (plats
);
3443 agg
= &avals
->m_known_aggs
[i
];
3444 agg
->items
= agg_items
;
3445 agg
->by_ref
= plats
->aggs_by_ref
;
3446 ret
|= !agg_items
.is_empty ();
3453 /* Perform time and size measurement of NODE with the context given in AVALS,
3454 calculate the benefit compared to the node without specialization and store
3455 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3456 context-independent or unused removable parameters and EST_MOVE_COST, the
3457 estimated movement of the considered parameter. */
3460 perform_estimation_of_a_value (cgraph_node
*node
,
3461 ipa_auto_call_arg_values
*avals
,
3462 int removable_params_cost
, int est_move_cost
,
3463 ipcp_value_base
*val
)
3466 ipa_call_estimates estimates
;
3468 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3470 /* Extern inline functions have no cloning local time benefits because they
3471 will be inlined anyway. The only reason to clone them is if it enables
3472 optimization in any of the functions they call. */
3473 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3476 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3477 + (devirtualization_time_bonus (node
, avals
)
3478 + hint_time_bonus (node
, estimates
)
3479 + removable_params_cost
+ est_move_cost
);
3481 int size
= estimates
.size
;
3482 gcc_checking_assert (size
>=0);
3483 /* The inliner-heuristics based estimates may think that in certain
3484 contexts some functions do not have any size at all but we want
3485 all specializations to have at least a tiny cost, not least not to
3490 val
->local_time_benefit
= time_benefit
;
3491 val
->local_size_cost
= size
;
3494 /* Get the overall limit oof growth based on parameters extracted from growth.
3495 it does not really make sense to mix functions with different overall growth
3496 limits but it is possible and if it happens, we do not want to select one
3500 get_max_overall_size (cgraph_node
*node
)
3502 long max_new_size
= orig_overall_size
;
3503 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3504 if (max_new_size
< large_unit
)
3505 max_new_size
= large_unit
;
3506 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3507 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3508 return max_new_size
;
3511 /* Iterate over known values of parameters of NODE and estimate the local
3512 effects in terms of time and size they have. */
3515 estimate_local_effects (struct cgraph_node
*node
)
3517 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3518 int i
, count
= ipa_get_param_count (info
);
3520 int removable_params_cost
;
3522 if (!count
|| !ipcp_versionable_function_p (node
))
3525 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3526 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3528 ipa_auto_call_arg_values avals
;
3529 always_const
= gather_context_independent_values (info
, &avals
, true,
3530 &removable_params_cost
);
3531 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3532 if (always_const
|| devirt_bonus
3533 || (removable_params_cost
&& node
->can_change_signature
))
3535 struct caller_statistics stats
;
3536 ipa_call_estimates estimates
;
3538 init_caller_stats (&stats
);
3539 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3541 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3542 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3543 time
+= devirt_bonus
;
3544 time
+= hint_time_bonus (node
, estimates
);
3545 time
+= removable_params_cost
;
3546 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3549 fprintf (dump_file
, " - context independent values, size: %i, "
3550 "time_benefit: %f\n", size
, (time
).to_double ());
3552 if (size
<= 0 || node
->local
)
3554 info
->do_clone_for_all_contexts
= true;
3557 fprintf (dump_file
, " Decided to specialize for all "
3558 "known contexts, code not going to grow.\n");
3560 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3561 stats
.count_sum
, size
))
3563 if (size
+ overall_size
<= get_max_overall_size (node
))
3565 info
->do_clone_for_all_contexts
= true;
3566 overall_size
+= size
;
3569 fprintf (dump_file
, " Decided to specialize for all "
3570 "known contexts, growth (to %li) deemed "
3571 "beneficial.\n", overall_size
);
3573 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3574 fprintf (dump_file
, " Not cloning for all contexts because "
3575 "maximum unit size would be reached with %li.\n",
3576 size
+ overall_size
);
3578 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3579 fprintf (dump_file
, " Not cloning for all contexts because "
3580 "!good_cloning_opportunity_p.\n");
3584 for (i
= 0; i
< count
; i
++)
3586 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3587 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3588 ipcp_value
<tree
> *val
;
3592 || avals
.m_known_vals
[i
])
3595 for (val
= lat
->values
; val
; val
= val
->next
)
3597 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3598 avals
.m_known_vals
[i
] = val
->value
;
3600 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3601 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3604 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3606 fprintf (dump_file
, " - estimates for value ");
3607 print_ipcp_constant_value (dump_file
, val
->value
);
3608 fprintf (dump_file
, " for ");
3609 ipa_dump_param (dump_file
, info
, i
);
3610 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3611 val
->local_time_benefit
.to_double (),
3612 val
->local_size_cost
);
3615 avals
.m_known_vals
[i
] = NULL_TREE
;
3618 for (i
= 0; i
< count
; i
++)
3620 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3622 if (!plats
->virt_call
)
3625 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3626 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3630 || !avals
.m_known_contexts
[i
].useless_p ())
3633 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3635 avals
.m_known_contexts
[i
] = val
->value
;
3636 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3641 fprintf (dump_file
, " - estimates for polymorphic context ");
3642 print_ipcp_constant_value (dump_file
, val
->value
);
3643 fprintf (dump_file
, " for ");
3644 ipa_dump_param (dump_file
, info
, i
);
3645 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3646 val
->local_time_benefit
.to_double (),
3647 val
->local_size_cost
);
3650 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3653 for (i
= 0; i
< count
; i
++)
3655 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3657 if (plats
->aggs_bottom
|| !plats
->aggs
)
3660 ipa_agg_value_set
*agg
= &avals
.m_known_aggs
[i
];
3661 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3663 ipcp_value
<tree
> *val
;
3664 if (aglat
->bottom
|| !aglat
->values
3665 /* If the following is true, the one value is in known_aggs. */
3666 || (!plats
->aggs_contain_variable
3667 && aglat
->is_single_const ()))
3670 for (val
= aglat
->values
; val
; val
= val
->next
)
3672 struct ipa_agg_value item
;
3674 item
.offset
= aglat
->offset
;
3675 item
.value
= val
->value
;
3676 agg
->items
.safe_push (item
);
3678 perform_estimation_of_a_value (node
, &avals
,
3679 removable_params_cost
, 0, val
);
3681 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3683 fprintf (dump_file
, " - estimates for value ");
3684 print_ipcp_constant_value (dump_file
, val
->value
);
3685 fprintf (dump_file
, " for ");
3686 ipa_dump_param (dump_file
, info
, i
);
3687 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3688 "]: time_benefit: %g, size: %i\n",
3689 plats
->aggs_by_ref
? "ref " : "",
3691 val
->local_time_benefit
.to_double (),
3692 val
->local_size_cost
);
3702 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3703 topological sort of values. */
3705 template <typename valtype
>
3707 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3709 ipcp_value_source
<valtype
> *src
;
3715 cur_val
->dfs
= dfs_counter
;
3716 cur_val
->low_link
= dfs_counter
;
3718 cur_val
->topo_next
= stack
;
3720 cur_val
->on_stack
= true;
3722 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3725 if (src
->val
->dfs
== 0)
3728 if (src
->val
->low_link
< cur_val
->low_link
)
3729 cur_val
->low_link
= src
->val
->low_link
;
3731 else if (src
->val
->on_stack
3732 && src
->val
->dfs
< cur_val
->low_link
)
3733 cur_val
->low_link
= src
->val
->dfs
;
3736 if (cur_val
->dfs
== cur_val
->low_link
)
3738 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3743 stack
= v
->topo_next
;
3744 v
->on_stack
= false;
3745 v
->scc_no
= cur_val
->dfs
;
3747 v
->scc_next
= scc_list
;
3750 while (v
!= cur_val
);
3752 cur_val
->topo_next
= values_topo
;
3753 values_topo
= cur_val
;
3757 /* Add all values in lattices associated with NODE to the topological sort if
3758 they are not there yet. */
3761 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3763 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3764 int i
, count
= ipa_get_param_count (info
);
3766 for (i
= 0; i
< count
; i
++)
3768 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3769 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3770 struct ipcp_agg_lattice
*aglat
;
3774 ipcp_value
<tree
> *val
;
3775 for (val
= lat
->values
; val
; val
= val
->next
)
3776 topo
->constants
.add_val (val
);
3779 if (!plats
->aggs_bottom
)
3780 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3783 ipcp_value
<tree
> *val
;
3784 for (val
= aglat
->values
; val
; val
= val
->next
)
3785 topo
->constants
.add_val (val
);
3788 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3789 if (!ctxlat
->bottom
)
3791 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3792 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3793 topo
->contexts
.add_val (ctxval
);
3798 /* One pass of constants propagation along the call graph edges, from callers
3799 to callees (requires topological ordering in TOPO), iterate over strongly
3800 connected components. */
3803 propagate_constants_topo (class ipa_topo_info
*topo
)
3807 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3810 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3811 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3813 /* First, iteratively propagate within the strongly connected component
3814 until all lattices stabilize. */
3815 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3816 if (v
->has_gimple_body_p ())
3818 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3819 && opt_for_fn (v
->decl
, optimize
))
3820 push_node_to_stack (topo
, v
);
3821 /* When V is not optimized, we can not push it to stack, but
3822 still we need to set all its callees lattices to bottom. */
3825 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3826 propagate_constants_across_call (cs
);
3830 v
= pop_node_from_stack (topo
);
3833 struct cgraph_edge
*cs
;
3834 class ipa_node_params
*info
= NULL
;
3835 bool self_scc
= true;
3837 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3838 if (ipa_edge_within_scc (cs
))
3840 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3847 info
= ipa_node_params_sum
->get (v
);
3848 info
->node_within_scc
= true;
3851 if (propagate_constants_across_call (cs
))
3852 push_node_to_stack (topo
, callee
);
3856 info
->node_is_self_scc
= self_scc
;
3858 v
= pop_node_from_stack (topo
);
3861 /* Afterwards, propagate along edges leading out of the SCC, calculates
3862 the local effects of the discovered constants and all valid values to
3863 their topological sort. */
3864 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3865 if (v
->has_gimple_body_p ()
3866 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3867 && opt_for_fn (v
->decl
, optimize
))
3869 struct cgraph_edge
*cs
;
3871 estimate_local_effects (v
);
3872 add_all_node_vals_to_toposort (v
, topo
);
3873 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3874 if (!ipa_edge_within_scc (cs
))
3875 propagate_constants_across_call (cs
);
3877 cycle_nodes
.release ();
3881 /* Propagate the estimated effects of individual values along the topological
3882 from the dependent values to those they depend on. */
3884 template <typename valtype
>
3886 value_topo_info
<valtype
>::propagate_effects ()
3888 ipcp_value
<valtype
> *base
;
3889 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3891 for (base
= values_topo
; base
; base
= base
->topo_next
)
3893 ipcp_value_source
<valtype
> *src
;
3894 ipcp_value
<valtype
> *val
;
3896 HOST_WIDE_INT size
= 0;
3898 for (val
= base
; val
; val
= val
->scc_next
)
3900 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3901 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3904 for (val
= base
; val
; val
= val
->scc_next
)
3906 processed_srcvals
.empty ();
3907 for (src
= val
->sources
; src
; src
= src
->next
)
3909 && src
->cs
->maybe_hot_p ())
3911 if (!processed_srcvals
.add (src
->val
))
3913 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3914 if (prop_size
< INT_MAX
)
3915 src
->val
->prop_size_cost
= prop_size
;
3920 int special_factor
= 1;
3921 if (val
->same_scc (src
->val
))
3923 = opt_for_fn(src
->cs
->caller
->decl
,
3924 param_ipa_cp_recursive_freq_factor
);
3925 else if (val
->self_recursion_generated_p ()
3926 && (src
->cs
->callee
->function_symbol ()
3927 == src
->cs
->caller
))
3929 int max_recur_gen_depth
3930 = opt_for_fn(src
->cs
->caller
->decl
,
3931 param_ipa_cp_max_recursive_depth
);
3932 special_factor
= max_recur_gen_depth
3933 - val
->self_recursion_generated_level
+ 1;
3936 src
->val
->prop_time_benefit
3937 += time
* special_factor
* src
->cs
->sreal_frequency ();
3942 val
->prop_time_benefit
= time
;
3943 val
->prop_size_cost
= size
;
3947 val
->prop_time_benefit
= 0;
3948 val
->prop_size_cost
= 0;
3954 /* Callback for qsort to sort counts of all edges. */
3957 compare_edge_profile_counts (const void *a
, const void *b
)
3959 const profile_count
*cnt1
= (const profile_count
*) a
;
3960 const profile_count
*cnt2
= (const profile_count
*) b
;
3970 /* Propagate constants, polymorphic contexts and their effects from the
3971 summaries interprocedurally. */
3974 ipcp_propagate_stage (class ipa_topo_info
*topo
)
3976 struct cgraph_node
*node
;
3979 fprintf (dump_file
, "\n Propagating constants:\n\n");
3981 base_count
= profile_count::uninitialized ();
3983 bool compute_count_base
= false;
3984 unsigned base_count_pos_percent
= 0;
3985 FOR_EACH_DEFINED_FUNCTION (node
)
3987 if (node
->has_gimple_body_p ()
3988 && opt_for_fn (node
->decl
, flag_ipa_cp
)
3989 && opt_for_fn (node
->decl
, optimize
))
3991 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3992 determine_versionability (node
, info
);
3994 unsigned nlattices
= ipa_get_param_count (info
);
3995 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
3996 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
3997 initialize_node_lattices (node
);
3999 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4000 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4001 overall_size
+= s
->self_size
;
4002 if (node
->count
.ipa ().initialized_p ())
4004 compute_count_base
= true;
4005 unsigned pos_percent
= opt_for_fn (node
->decl
,
4006 param_ipa_cp_profile_count_base
);
4007 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4011 if (compute_count_base
)
4013 auto_vec
<profile_count
> all_edge_counts
;
4014 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4015 FOR_EACH_DEFINED_FUNCTION (node
)
4016 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4018 profile_count count
= cs
->count
.ipa ();
4019 if (!(count
> profile_count::zero ()))
4022 enum availability avail
;
4024 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4025 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4026 if (info
&& info
->versionable
)
4027 all_edge_counts
.quick_push (count
);
4030 if (!all_edge_counts
.is_empty ())
4032 gcc_assert (base_count_pos_percent
<= 100);
4033 all_edge_counts
.qsort (compare_edge_profile_counts
);
4035 unsigned base_count_pos
4036 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4037 base_count
= all_edge_counts
[base_count_pos
];
4041 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4042 "position %u, arriving at: ", all_edge_counts
.length (),
4044 base_count
.dump (dump_file
);
4045 fprintf (dump_file
, "\n");
4049 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4050 "continuing as if without profile feedback.\n");
4053 orig_overall_size
= overall_size
;
4056 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4058 propagate_constants_topo (topo
);
4060 ipcp_verify_propagated_values ();
4061 topo
->constants
.propagate_effects ();
4062 topo
->contexts
.propagate_effects ();
4066 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4067 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4071 /* Discover newly direct outgoing edges from NODE which is a new clone with
4072 known KNOWN_CSTS and make them direct. */
4075 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4076 vec
<tree
> known_csts
,
4077 vec
<ipa_polymorphic_call_context
>
4079 struct ipa_agg_replacement_value
*aggvals
)
4081 struct cgraph_edge
*ie
, *next_ie
;
4084 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4089 next_ie
= ie
->next_callee
;
4090 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4091 vNULL
, aggvals
, &speculative
);
4094 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4095 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4096 int param_index
= ie
->indirect_info
->param_index
;
4097 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4101 if (cs
&& !agg_contents
&& !polymorphic
)
4103 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4104 int c
= ipa_get_controlled_uses (info
, param_index
);
4105 if (c
!= IPA_UNDESCRIBED_USE
4106 && !ipa_get_param_load_dereferenced (info
, param_index
))
4108 struct ipa_ref
*to_del
;
4111 ipa_set_controlled_uses (info
, param_index
, c
);
4112 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4113 fprintf (dump_file
, " controlled uses count of param "
4114 "%i bumped down to %i\n", param_index
, c
);
4116 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0)))
4118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4119 fprintf (dump_file
, " and even removing its "
4120 "cloning-created reference\n");
4121 to_del
->remove_reference ();
4127 /* Turning calls to direct calls will improve overall summary. */
4129 ipa_update_overall_fn_summary (node
);
4132 class edge_clone_summary
;
4133 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4135 /* Edge clone summary. */
4137 class edge_clone_summary
4140 /* Default constructor. */
4141 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4143 /* Default destructor. */
4144 ~edge_clone_summary ()
4147 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4149 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4152 cgraph_edge
*prev_clone
;
4153 cgraph_edge
*next_clone
;
4156 class edge_clone_summary_t
:
4157 public call_summary
<edge_clone_summary
*>
4160 edge_clone_summary_t (symbol_table
*symtab
):
4161 call_summary
<edge_clone_summary
*> (symtab
)
4163 m_initialize_when_cloning
= true;
4166 virtual void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4167 edge_clone_summary
*src_data
,
4168 edge_clone_summary
*dst_data
);
4171 /* Edge duplication hook. */
4174 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4175 edge_clone_summary
*src_data
,
4176 edge_clone_summary
*dst_data
)
4178 if (src_data
->next_clone
)
4179 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4180 dst_data
->prev_clone
= src_edge
;
4181 dst_data
->next_clone
= src_data
->next_clone
;
4182 src_data
->next_clone
= dst_edge
;
4185 /* Return true is CS calls DEST or its clone for all contexts. When
4186 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4187 edges from/to an all-context clone. */
4190 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4191 bool allow_recursion_to_clone
)
4193 enum availability availability
;
4194 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4196 if (availability
<= AVAIL_INTERPOSABLE
)
4200 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4203 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4204 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4207 /* Return true if edge CS does bring about the value described by SRC to
4208 DEST_VAL of node DEST or its clone for all contexts. */
4211 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4212 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4214 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4216 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4217 || caller_info
->node_dead
)
4223 if (caller_info
->ipcp_orig_node
)
4226 if (src
->offset
== -1)
4227 t
= caller_info
->known_csts
[src
->index
];
4229 t
= get_clone_agg_value (cs
->caller
, src
->offset
, src
->index
);
4230 return (t
!= NULL_TREE
4231 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4235 if (src
->val
== dest_val
)
4238 struct ipcp_agg_lattice
*aglat
;
4239 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4241 if (src
->offset
== -1)
4242 return (plats
->itself
.is_single_const ()
4243 && values_equal_for_ipcp_p (src
->val
->value
,
4244 plats
->itself
.values
->value
));
4247 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4249 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4250 if (aglat
->offset
== src
->offset
)
4251 return (aglat
->is_single_const ()
4252 && values_equal_for_ipcp_p (src
->val
->value
,
4253 aglat
->values
->value
));
4259 /* Return true if edge CS does bring about the value described by SRC to
4260 DST_VAL of node DEST or its clone for all contexts. */
4263 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4264 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4266 ipcp_value
<ipa_polymorphic_call_context
> *)
4268 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4270 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4271 || caller_info
->node_dead
)
4276 if (caller_info
->ipcp_orig_node
)
4277 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4278 && values_equal_for_ipcp_p (src
->val
->value
,
4279 caller_info
->known_contexts
[src
->index
]);
4281 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4283 return plats
->ctxlat
.is_single_const ()
4284 && values_equal_for_ipcp_p (src
->val
->value
,
4285 plats
->ctxlat
.values
->value
);
4288 /* Get the next clone in the linked list of clones of an edge. */
4290 static inline struct cgraph_edge
*
4291 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4293 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4294 return s
!= NULL
? s
->next_clone
: NULL
;
4297 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4298 of them is viable and hot, return true. In that case, for those that still
4299 hold, add their edge frequency and their number and cumulative profile
4300 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4301 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4303 template <typename valtype
>
4305 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4306 sreal
*freq_sum
, int *caller_count
,
4307 profile_count
*rec_count_sum
,
4308 profile_count
*nonrec_count_sum
)
4310 ipcp_value_source
<valtype
> *src
;
4313 profile_count rec_cnt
= profile_count::zero ();
4314 profile_count nonrec_cnt
= profile_count::zero ();
4316 bool non_self_recursive
= false;
4318 for (src
= val
->sources
; src
; src
= src
->next
)
4320 struct cgraph_edge
*cs
= src
->cs
;
4323 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4326 freq
+= cs
->sreal_frequency ();
4327 hot
|= cs
->maybe_hot_p ();
4328 if (cs
->caller
!= dest
)
4330 non_self_recursive
= true;
4331 if (cs
->count
.ipa ().initialized_p ())
4332 rec_cnt
+= cs
->count
.ipa ();
4334 else if (cs
->count
.ipa ().initialized_p ())
4335 nonrec_cnt
+= cs
->count
.ipa ();
4337 cs
= get_next_cgraph_edge_clone (cs
);
4341 /* If the only edges bringing a value are self-recursive ones, do not bother
4343 if (!non_self_recursive
)
4347 *caller_count
= count
;
4348 *rec_count_sum
= rec_cnt
;
4349 *nonrec_count_sum
= nonrec_cnt
;
4351 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4353 struct cgraph_edge
*cs
;
4355 /* Cold non-SCC source edge could trigger hot recursive execution of
4356 function. Consider the case as hot and rely on following cost model
4357 computation to further select right one. */
4358 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4359 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4366 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4367 to let a non-self-recursive caller be the first element. Thus, we can
4368 simplify intersecting operations on values that arrive from all of these
4369 callers, especially when there exists self-recursive call. Return true if
4370 this kind of adjustment is possible. */
4373 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4376 for (unsigned i
= 0; i
< callers
.length (); i
++)
4378 cgraph_edge
*cs
= callers
[i
];
4380 if (cs
->caller
!= node
)
4384 callers
[i
] = callers
[0];
4393 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4394 is assumed their number is known and equal to CALLER_COUNT. */
4396 template <typename valtype
>
4397 static vec
<cgraph_edge
*>
4398 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4401 ipcp_value_source
<valtype
> *src
;
4402 vec
<cgraph_edge
*> ret
;
4404 ret
.create (caller_count
);
4405 for (src
= val
->sources
; src
; src
= src
->next
)
4407 struct cgraph_edge
*cs
= src
->cs
;
4410 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4411 ret
.quick_push (cs
);
4412 cs
= get_next_cgraph_edge_clone (cs
);
4416 if (caller_count
> 1)
4417 adjust_callers_for_value_intersection (ret
, dest
);
4422 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4423 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4424 should be set to true when the reference created for the constant should be
4425 a load one and not an address one because the corresponding parameter p is
4428 static struct ipa_replace_map
*
4429 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4430 bool force_load_ref
)
4432 struct ipa_replace_map
*replace_map
;
4434 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4437 fprintf (dump_file
, " replacing ");
4438 ipa_dump_param (dump_file
, info
, parm_num
);
4440 fprintf (dump_file
, " with const ");
4441 print_generic_expr (dump_file
, value
);
4444 fprintf (dump_file
, " - forcing load reference\n");
4446 fprintf (dump_file
, "\n");
4448 replace_map
->parm_num
= parm_num
;
4449 replace_map
->new_tree
= value
;
4450 replace_map
->force_load_ref
= force_load_ref
;
4454 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4455 one, otherwise it will be referred to as the original node. */
4458 dump_profile_updates (cgraph_node
*node
, bool spec
)
4461 fprintf (dump_file
, " setting count of the specialized node %s to ",
4462 node
->dump_name ());
4464 fprintf (dump_file
, " setting count of the original node %s to ",
4465 node
->dump_name ());
4467 node
->count
.dump (dump_file
);
4468 fprintf (dump_file
, "\n");
4469 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4471 fprintf (dump_file
, " edge to %s has count ",
4472 cs
->callee
->dump_name ());
4473 cs
->count
.dump (dump_file
);
4474 fprintf (dump_file
, "\n");
4478 /* With partial train run we do not want to assume that original's count is
4479 zero whenever we redurect all executed edges to clone. Simply drop profile
4480 to local one in this case. In eany case, return the new value. ORIG_NODE
4481 is the original node and its count has not been updaed yet. */
4484 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4486 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4487 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4488 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4489 remainder
= remainder
.guessed_local ();
4494 /* Structure to sum counts coming from nodes other than the original node and
4497 struct gather_other_count_struct
4500 profile_count other_count
;
4503 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4504 counts that come from non-self-recursive calls.. */
4507 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4509 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4510 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4511 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4512 desc
->other_count
+= cs
->count
.ipa ();
4516 /* Structure to help analyze if we need to boost counts of some clones of some
4517 non-recursive edges to match the new callee count. */
4519 struct desc_incoming_count_struct
4522 hash_set
<cgraph_edge
*> *processed_edges
;
4523 profile_count count
;
4524 unsigned unproc_orig_rec_edges
;
4527 /* Go over edges calling NODE and its thunks and gather information about
4528 incoming counts so that we know if we need to make any adjustments. */
4531 analyze_clone_icoming_counts (cgraph_node
*node
,
4532 desc_incoming_count_struct
*desc
)
4534 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4535 if (cs
->caller
->thunk
)
4537 analyze_clone_icoming_counts (cs
->caller
, desc
);
4542 if (cs
->count
.initialized_p ())
4543 desc
->count
+= cs
->count
.ipa ();
4544 if (!desc
->processed_edges
->contains (cs
)
4545 && cs
->caller
->clone_of
== desc
->orig
)
4546 desc
->unproc_orig_rec_edges
++;
4550 /* If caller edge counts of a clone created for a self-recursive arithmetic
4551 jump function must be adjusted because it is coming from a the "seed" clone
4552 for the first value and so has been excessively scaled back as if it was not
4553 a recursive call, adjust it so that the incoming counts of NODE match its
4554 count. NODE is the node or its thunk. */
4557 adjust_clone_incoming_counts (cgraph_node
*node
,
4558 desc_incoming_count_struct
*desc
)
4560 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4561 if (cs
->caller
->thunk
)
4563 adjust_clone_incoming_counts (cs
->caller
, desc
);
4564 profile_count sum
= profile_count::zero ();
4565 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4566 if (e
->count
.initialized_p ())
4567 sum
+= e
->count
.ipa ();
4568 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4570 else if (!desc
->processed_edges
->contains (cs
)
4571 && cs
->caller
->clone_of
== desc
->orig
)
4573 cs
->count
+= desc
->count
;
4576 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4577 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4578 cs
->callee
->dump_name ());
4579 cs
->count
.dump (dump_file
);
4580 fprintf (dump_file
, "\n");
4585 /* When ORIG_NODE has been cloned for values which have been generated fora
4586 self-recursive call as a result of an arithmetic pass-through
4587 jump-functions, adjust its count together with counts of all such clones in
4588 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4590 The function sums the counts of the original node and all its clones that
4591 cannot be attributed to a specific clone because it comes from a
4592 non-recursive edge. This sum is then evenly divided between the clones and
4593 on top of that each one gets all the counts which can be attributed directly
4597 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4598 const vec
<cgraph_node
*> &self_gen_clones
)
4600 profile_count redist_sum
= orig_node
->count
.ipa ();
4601 if (!(redist_sum
> profile_count::zero ()))
4605 fprintf (dump_file
, " Updating profile of self recursive clone "
4608 gather_other_count_struct gocs
;
4609 gocs
.orig
= orig_node
;
4610 gocs
.other_count
= profile_count::zero ();
4612 auto_vec
<profile_count
, 8> other_edges_count
;
4613 for (cgraph_node
*n
: self_gen_clones
)
4615 gocs
.other_count
= profile_count::zero ();
4616 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4618 other_edges_count
.safe_push (gocs
.other_count
);
4619 redist_sum
-= gocs
.other_count
;
4622 hash_set
<cgraph_edge
*> processed_edges
;
4624 for (cgraph_node
*n
: self_gen_clones
)
4626 profile_count orig_count
= n
->count
;
4627 profile_count new_count
4628 = (redist_sum
.apply_scale (1, self_gen_clones
.length ())
4629 + other_edges_count
[i
]);
4630 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4631 n
->count
= new_count
;
4632 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4633 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4635 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4636 processed_edges
.add (cs
);
4638 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4639 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4644 /* There are still going to be edges to ORIG_NODE that have one or more
4645 clones coming from another node clone in SELF_GEN_CLONES and which we
4646 scaled by the same amount, which means that the total incoming sum of
4647 counts to ORIG_NODE will be too high, scale such edges back. */
4648 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4650 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4653 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4654 if (e
->callee
->ultimate_alias_target () == orig_node
4655 && processed_edges
.contains (e
))
4658 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4659 if (e
->callee
->ultimate_alias_target () == orig_node
4660 && processed_edges
.contains (e
))
4661 e
->count
= e
->count
.apply_scale (1, den
);
4665 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4666 along self-recursive edges are likely to have fairly low count and so
4667 edges from them to nodes in the self_gen_clones do not correspond to the
4668 artificially distributed count of the nodes, the total sum of incoming
4669 edges to some clones might be too low. Detect this situation and correct
4671 for (cgraph_node
*n
: self_gen_clones
)
4673 if (!(n
->count
.ipa () > profile_count::zero ()))
4676 desc_incoming_count_struct desc
;
4677 desc
.orig
= orig_node
;
4678 desc
.processed_edges
= &processed_edges
;
4679 desc
.count
= profile_count::zero ();
4680 desc
.unproc_orig_rec_edges
= 0;
4681 analyze_clone_icoming_counts (n
, &desc
);
4683 if (n
->count
.differs_from_p (desc
.count
))
4685 if (n
->count
> desc
.count
4686 && desc
.unproc_orig_rec_edges
> 0)
4688 desc
.count
= n
->count
- desc
.count
;
4690 = desc
.count
.apply_scale (1, desc
.unproc_orig_rec_edges
);
4691 adjust_clone_incoming_counts (n
, &desc
);
4695 " Unable to fix up incoming counts for %s.\n",
4701 for (cgraph_node
*n
: self_gen_clones
)
4702 dump_profile_updates (n
, n
!= orig_node
);
4706 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4707 their profile information to reflect this. This function should not be used
4708 for clones generated for arithmetic pass-through jump functions on a
4709 self-recursive call graph edge, that situation is handled by
4710 update_counts_for_self_gen_clones. */
4713 update_profiling_info (struct cgraph_node
*orig_node
,
4714 struct cgraph_node
*new_node
)
4716 struct caller_statistics stats
;
4717 profile_count new_sum
;
4718 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4720 if (!(orig_node_count
> profile_count::zero ()))
4725 fprintf (dump_file
, " Updating profile from original count: ");
4726 orig_node_count
.dump (dump_file
);
4727 fprintf (dump_file
, "\n");
4730 init_caller_stats (&stats
, new_node
);
4731 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4733 new_sum
= stats
.count_sum
;
4735 if (new_sum
> orig_node_count
)
4737 /* TODO: Perhaps this should be gcc_unreachable ()? */
4738 remainder
= profile_count::zero ().guessed_local ();
4740 else if (stats
.rec_count_sum
.nonzero_p ())
4742 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4743 /* There are self-recursive edges which are likely to bring in the
4744 majority of calls but which we must divide in between the original and
4746 init_caller_stats (&stats
, orig_node
);
4747 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4749 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4750 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4752 if (orig_node
->local
)
4754 if (!orig_nonrec_call_count
.nonzero_p ())
4757 fprintf (dump_file
, " The original is local and the only "
4758 "incoming edges from non-dead callers with nonzero "
4759 "counts are self-recursive, assuming it is cold.\n");
4760 /* The NEW_NODE count and counts of all its outgoing edges
4761 are still unmodified copies of ORIG_NODE's. Just clear
4762 the latter and bail out. */
4764 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4765 zero
= profile_count::zero ().guessed_local ();
4767 zero
= profile_count::adjusted_zero ();
4768 orig_node
->count
= zero
;
4769 for (cgraph_edge
*cs
= orig_node
->callees
;
4771 cs
= cs
->next_callee
)
4773 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4775 cs
= cs
->next_callee
)
4782 /* Let's behave as if there was another caller that accounts for all
4783 the calls that were either indirect or from other compilation
4785 orig_nonrec_calls
++;
4786 profile_count pretend_caller_count
4787 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4788 - stats
.rec_count_sum
);
4789 orig_nonrec_call_count
+= pretend_caller_count
;
4792 /* Divide all "unexplained" counts roughly proportionally to sums of
4793 counts of non-recursive calls.
4795 We put rather arbitrary limits on how many counts we claim because the
4796 number of non-self-recursive incoming count is only a rough guideline
4797 and there are cases (such as mcf) where using it blindly just takes
4798 too many. And if lattices are considered in the opposite order we
4799 could also take too few. */
4800 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4802 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4803 profile_count new_part
4804 = MAX(MIN (unexp
.apply_scale (new_sum
,
4805 new_sum
+ orig_nonrec_call_count
),
4806 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4807 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4810 fprintf (dump_file
, " Claiming ");
4811 new_part
.dump (dump_file
);
4812 fprintf (dump_file
, " of unexplained ");
4813 unexp
.dump (dump_file
);
4814 fprintf (dump_file
, " counts because of self-recursive "
4817 new_sum
+= new_part
;
4818 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4822 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4825 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4826 new_node
->count
= new_sum
;
4827 orig_node
->count
= remainder
;
4829 profile_count orig_new_node_count
= orig_node_count
;
4830 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4831 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4832 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4833 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4834 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4836 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4837 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4838 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4839 for (cgraph_edge
*cs
= orig_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4840 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4844 dump_profile_updates (new_node
, true);
4845 dump_profile_updates (orig_node
, false);
4849 /* Update the respective profile of specialized NEW_NODE and the original
4850 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4851 have been redirected to the specialized version. */
4854 update_specialized_profile (struct cgraph_node
*new_node
,
4855 struct cgraph_node
*orig_node
,
4856 profile_count redirected_sum
)
4858 struct cgraph_edge
*cs
;
4859 profile_count new_node_count
, orig_node_count
= orig_node
->count
;
4863 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4864 redirected_sum
.dump (dump_file
);
4865 fprintf (dump_file
, "\n");
4867 if (!(orig_node_count
> profile_count::zero ()))
4870 gcc_assert (orig_node_count
>= redirected_sum
);
4872 new_node_count
= new_node
->count
;
4873 new_node
->count
+= redirected_sum
;
4874 orig_node
->count
-= redirected_sum
;
4876 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4877 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4879 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4881 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4888 dump_profile_updates (new_node
, true);
4889 dump_profile_updates (orig_node
, false);
4893 static void adjust_references_in_caller (cgraph_edge
*cs
,
4894 symtab_node
*symbol
, int index
);
4896 /* Simple structure to pass a symbol and index (with same meaning as parameters
4897 of adjust_references_in_caller) through a void* parameter of a
4898 call_for_symbol_thunks_and_aliases callback. */
4899 struct symbol_and_index_together
4901 symtab_node
*symbol
;
4905 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4906 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4908 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4910 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4911 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4912 if (!cs
->caller
->thunk
)
4913 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4917 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4918 variable which is only dereferenced and which is represented by SYMBOL. See
4919 if we can remove ADDR reference in callers assosiated witht the call. */
4922 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4924 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4925 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4926 if (jfunc
->type
== IPA_JF_CONST
)
4928 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
4932 to_del
->remove_reference ();
4934 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4935 cs
->caller
->dump_name (), symbol
->dump_name ());
4939 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
4940 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
4943 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
4944 cgraph_node
*caller
= cs
->caller
;
4945 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
4946 /* TODO: This consistency check may be too big and not really
4947 that useful. Consider removing it. */
4949 if (caller_info
->ipcp_orig_node
)
4950 cst
= caller_info
->known_csts
[fidx
];
4953 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
4954 gcc_assert (lat
->is_single_const ());
4955 cst
= lat
->values
->value
;
4957 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
4958 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
4961 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
4962 if (cuses
== IPA_UNDESCRIBED_USE
)
4964 gcc_assert (cuses
> 0);
4966 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
4970 if (caller_info
->ipcp_orig_node
)
4972 /* Cloning machinery has created a reference here, we need to either
4973 remove it or change it to a read one. */
4974 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0);
4975 if (to_del
&& to_del
->use
== IPA_REF_ADDR
)
4977 to_del
->remove_reference ();
4979 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4980 cs
->caller
->dump_name (), symbol
->dump_name ());
4981 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
4983 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
4986 " ...and replaced it with LOAD one.\n");
4991 symbol_and_index_together pack
;
4992 pack
.symbol
= symbol
;
4994 if (caller
->can_change_signature
)
4995 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5000 /* Return true if we would like to remove a parameter from NODE when cloning it
5001 with KNOWN_CSTS scalar constants. */
5004 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5006 auto_vec
<bool, 16> surviving
;
5007 bool filled_vec
= false;
5008 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5009 int i
, count
= ipa_get_param_count (info
);
5011 for (i
= 0; i
< count
; i
++)
5013 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5018 clone_info
*info
= clone_info::get (node
);
5019 if (!info
|| !info
->param_adjustments
)
5021 info
->param_adjustments
->get_surviving_params (&surviving
);
5024 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5030 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5031 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5032 redirect all edges in CALLERS to it. */
5034 static struct cgraph_node
*
5035 create_specialized_node (struct cgraph_node
*node
,
5036 vec
<tree
> known_csts
,
5037 vec
<ipa_polymorphic_call_context
> known_contexts
,
5038 struct ipa_agg_replacement_value
*aggvals
,
5039 vec
<cgraph_edge
*> &callers
)
5041 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5042 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5043 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5044 struct ipa_agg_replacement_value
*av
;
5045 struct cgraph_node
*new_node
;
5046 int i
, count
= ipa_get_param_count (info
);
5047 clone_info
*cinfo
= clone_info::get (node
);
5048 ipa_param_adjustments
*old_adjustments
= cinfo
5049 ? cinfo
->param_adjustments
: NULL
;
5050 ipa_param_adjustments
*new_adjustments
;
5051 gcc_assert (!info
->ipcp_orig_node
);
5052 gcc_assert (node
->can_change_signature
5053 || !old_adjustments
);
5055 if (old_adjustments
)
5057 /* At the moment all IPA optimizations should use the number of
5058 parameters of the prevailing decl as the m_always_copy_start.
5059 Handling any other value would complicate the code below, so for the
5060 time bing let's only assert it is so. */
5061 gcc_assert (old_adjustments
->m_always_copy_start
== count
5062 || old_adjustments
->m_always_copy_start
< 0);
5063 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5064 for (i
= 0; i
< old_adj_count
; i
++)
5066 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5067 if (!node
->can_change_signature
5068 || old_adj
->op
!= IPA_PARAM_OP_COPY
5069 || (!known_csts
[old_adj
->base_index
]
5070 && ipa_is_param_used (info
, old_adj
->base_index
)))
5072 ipa_adjusted_param new_adj
= *old_adj
;
5074 new_adj
.prev_clone_adjustment
= true;
5075 new_adj
.prev_clone_index
= i
;
5076 vec_safe_push (new_params
, new_adj
);
5079 bool skip_return
= old_adjustments
->m_skip_return
;
5080 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5081 ipa_param_adjustments (new_params
, count
,
5084 else if (node
->can_change_signature
5085 && want_remove_some_param_p (node
, known_csts
))
5087 ipa_adjusted_param adj
;
5088 memset (&adj
, 0, sizeof (adj
));
5089 adj
.op
= IPA_PARAM_OP_COPY
;
5090 for (i
= 0; i
< count
; i
++)
5091 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5094 adj
.prev_clone_index
= i
;
5095 vec_safe_push (new_params
, adj
);
5097 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5098 ipa_param_adjustments (new_params
, count
, false));
5101 new_adjustments
= NULL
;
5103 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5104 for (i
= 0; i
< count
; i
++)
5106 tree t
= known_csts
[i
];
5110 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5112 bool load_ref
= false;
5113 symtab_node
*ref_symbol
;
5114 if (TREE_CODE (t
) == ADDR_EXPR
)
5116 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5117 if (TREE_CODE (base
) == VAR_DECL
5118 && ipa_get_controlled_uses (info
, i
) == 0
5119 && ipa_get_param_load_dereferenced (info
, i
)
5120 && (ref_symbol
= symtab_node::get (base
)))
5123 if (node
->can_change_signature
)
5124 for (cgraph_edge
*caller
: callers
)
5125 adjust_references_in_caller (caller
, ref_symbol
, i
);
5129 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5131 vec_safe_push (replace_trees
, replace_map
);
5133 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5134 for (i
= callers
.length () - 1; i
>= 0; i
--)
5136 cgraph_edge
*cs
= callers
[i
];
5137 if (cs
->caller
== node
)
5139 self_recursive_calls
.safe_push (cs
);
5140 callers
.unordered_remove (i
);
5144 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5145 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5147 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5148 new_adjustments
, "constprop",
5152 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5153 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5155 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5156 /* Cloned edges can disappear during cloning as speculation can be
5157 resolved, check that we have one and that it comes from the last
5159 if (cs
&& cs
->caller
== new_node
)
5160 cs
->redirect_callee_duplicating_thunks (new_node
);
5161 /* Any future code that would make more than one clone of an outgoing
5162 edge would confuse this mechanism, so let's check that does not
5164 gcc_checking_assert (!cs
5165 || !get_next_cgraph_edge_clone (cs
)
5166 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5168 if (have_self_recursive_calls
)
5169 new_node
->expand_all_artificial_thunks ();
5171 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5172 for (av
= aggvals
; av
; av
= av
->next
)
5173 new_node
->maybe_create_reference (av
->value
, NULL
);
5175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5177 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5178 if (known_contexts
.exists ())
5180 for (i
= 0; i
< count
; i
++)
5181 if (!known_contexts
[i
].useless_p ())
5183 fprintf (dump_file
, " known ctx %i is ", i
);
5184 known_contexts
[i
].dump (dump_file
);
5188 ipa_dump_agg_replacement_values (dump_file
, aggvals
);
5191 new_info
= ipa_node_params_sum
->get (new_node
);
5192 new_info
->ipcp_orig_node
= node
;
5193 new_node
->ipcp_clone
= true;
5194 new_info
->known_csts
= known_csts
;
5195 new_info
->known_contexts
= known_contexts
;
5197 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
, aggvals
);
5202 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5203 pass-through function to itself when the cgraph_node involved is not an
5204 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5205 no-operation pass-through. */
5208 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5211 enum availability availability
;
5212 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5213 && availability
> AVAIL_INTERPOSABLE
5214 && jfunc
->type
== IPA_JF_PASS_THROUGH
5215 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5216 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5217 && ipa_node_params_sum
->get (cs
->caller
)
5218 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5223 /* Return true if JFUNC, which describes a part of an aggregate represented or
5224 pointed to by the i-th parameter of call CS, is a pass-through function to
5225 itself when the cgraph_node involved is not an IPA-CP clone.. When
5226 SIMPLE is true, further check if JFUNC is a simple no-operation
5230 self_recursive_agg_pass_through_p (cgraph_edge
*cs
, ipa_agg_jf_item
*jfunc
,
5231 int i
, bool simple
= true)
5233 enum availability availability
;
5234 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5235 && availability
> AVAIL_INTERPOSABLE
5236 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5237 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5238 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5239 && jfunc
->value
.pass_through
.formal_id
== i
5240 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5241 && ipa_node_params_sum
->get (cs
->caller
)
5242 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5247 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5248 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5251 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5252 vec
<tree
> &known_csts
,
5253 const vec
<cgraph_edge
*> &callers
)
5255 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5256 int i
, count
= ipa_get_param_count (info
);
5258 for (i
= 0; i
< count
; i
++)
5260 struct cgraph_edge
*cs
;
5261 tree newval
= NULL_TREE
;
5264 tree type
= ipa_get_type (info
, i
);
5266 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5269 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5271 struct ipa_jump_func
*jump_func
;
5274 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5276 || i
>= ipa_get_cs_argument_count (args
)
5278 && call_passes_through_thunk (cs
)))
5283 jump_func
= ipa_get_ith_jump_func (args
, i
);
5285 /* Besides simple pass-through jump function, arithmetic jump
5286 function could also introduce argument-direct-pass-through for
5287 self-feeding recursive call. For example,
5294 Given that i is 0, recursive propagation via (i & 1) also gets
5296 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5298 gcc_assert (newval
);
5299 t
= ipa_get_jf_arith_result (
5300 ipa_get_jf_pass_through_operation (jump_func
),
5302 ipa_get_jf_pass_through_operand (jump_func
),
5306 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5310 && !values_equal_for_ipcp_p (t
, newval
))
5311 || (!first
&& !newval
))
5323 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5325 fprintf (dump_file
, " adding an extra known scalar value ");
5326 print_ipcp_constant_value (dump_file
, newval
);
5327 fprintf (dump_file
, " for ");
5328 ipa_dump_param (dump_file
, info
, i
);
5329 fprintf (dump_file
, "\n");
5332 known_csts
[i
] = newval
;
5337 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5338 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5342 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5343 vec
<ipa_polymorphic_call_context
>
5345 const vec
<cgraph_edge
*> &callers
)
5347 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5348 int i
, count
= ipa_get_param_count (info
);
5350 for (i
= 0; i
< count
; i
++)
5354 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5355 || (known_contexts
->exists ()
5356 && !(*known_contexts
)[i
].useless_p ()))
5359 ipa_polymorphic_call_context newval
;
5363 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5365 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5367 || i
>= ipa_get_cs_argument_count (args
))
5369 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5370 ipa_polymorphic_call_context ctx
;
5371 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5379 newval
.meet_with (ctx
);
5380 if (newval
.useless_p ())
5384 if (!newval
.useless_p ())
5386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5388 fprintf (dump_file
, " adding an extra known polymorphic "
5390 print_ipcp_constant_value (dump_file
, newval
);
5391 fprintf (dump_file
, " for ");
5392 ipa_dump_param (dump_file
, info
, i
);
5393 fprintf (dump_file
, "\n");
5396 if (!known_contexts
->exists ())
5397 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5399 (*known_contexts
)[i
] = newval
;
5405 /* Go through PLATS and create a vector of values consisting of values and
5406 offsets (minus OFFSET) of lattices that contain only a single value. */
5408 static vec
<ipa_agg_value
>
5409 copy_plats_to_inter (class ipcp_param_lattices
*plats
, HOST_WIDE_INT offset
)
5411 vec
<ipa_agg_value
> res
= vNULL
;
5413 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5416 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
5417 if (aglat
->is_single_const ())
5419 struct ipa_agg_value ti
;
5420 ti
.offset
= aglat
->offset
- offset
;
5421 ti
.value
= aglat
->values
->value
;
5427 /* Intersect all values in INTER with single value lattices in PLATS (while
5428 subtracting OFFSET). */
5431 intersect_with_plats (class ipcp_param_lattices
*plats
,
5432 vec
<ipa_agg_value
> *inter
,
5433 HOST_WIDE_INT offset
)
5435 struct ipcp_agg_lattice
*aglat
;
5436 struct ipa_agg_value
*item
;
5439 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5445 aglat
= plats
->aggs
;
5446 FOR_EACH_VEC_ELT (*inter
, k
, item
)
5453 if (aglat
->offset
- offset
> item
->offset
)
5455 if (aglat
->offset
- offset
== item
->offset
)
5457 if (aglat
->is_single_const ())
5459 tree value
= aglat
->values
->value
;
5461 if (values_equal_for_ipcp_p (item
->value
, value
))
5466 aglat
= aglat
->next
;
5469 item
->value
= NULL_TREE
;
5473 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5474 vector result while subtracting OFFSET from the individual value offsets. */
5476 static vec
<ipa_agg_value
>
5477 agg_replacements_to_vector (struct cgraph_node
*node
, int index
,
5478 HOST_WIDE_INT offset
)
5480 struct ipa_agg_replacement_value
*av
;
5481 vec
<ipa_agg_value
> res
= vNULL
;
5483 for (av
= ipa_get_agg_replacements_for_node (node
); av
; av
= av
->next
)
5484 if (av
->index
== index
5485 && (av
->offset
- offset
) >= 0)
5487 struct ipa_agg_value item
;
5488 gcc_checking_assert (av
->value
);
5489 item
.offset
= av
->offset
- offset
;
5490 item
.value
= av
->value
;
5491 res
.safe_push (item
);
5497 /* Intersect all values in INTER with those that we have already scheduled to
5498 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5499 (while subtracting OFFSET). */
5502 intersect_with_agg_replacements (struct cgraph_node
*node
, int index
,
5503 vec
<ipa_agg_value
> *inter
,
5504 HOST_WIDE_INT offset
)
5506 struct ipa_agg_replacement_value
*srcvals
;
5507 struct ipa_agg_value
*item
;
5510 srcvals
= ipa_get_agg_replacements_for_node (node
);
5517 FOR_EACH_VEC_ELT (*inter
, i
, item
)
5519 struct ipa_agg_replacement_value
*av
;
5523 for (av
= srcvals
; av
; av
= av
->next
)
5525 gcc_checking_assert (av
->value
);
5526 if (av
->index
== index
5527 && av
->offset
- offset
== item
->offset
)
5529 if (values_equal_for_ipcp_p (item
->value
, av
->value
))
5535 item
->value
= NULL_TREE
;
5539 /* Intersect values in INTER with aggregate values that come along edge CS to
5540 parameter number INDEX and return it. If INTER does not actually exist yet,
5541 copy all incoming values to it. If we determine we ended up with no values
5542 whatsoever, return a released vector. */
5544 static vec
<ipa_agg_value
>
5545 intersect_aggregates_with_edge (struct cgraph_edge
*cs
, int index
,
5546 vec
<ipa_agg_value
> inter
)
5548 struct ipa_jump_func
*jfunc
;
5549 jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), index
);
5550 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5551 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5553 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5554 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5556 if (caller_info
->ipcp_orig_node
)
5558 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5559 class ipcp_param_lattices
*orig_plats
;
5560 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5561 orig_plats
= ipa_get_parm_lattices (orig_info
, src_idx
);
5562 if (agg_pass_through_permissible_p (orig_plats
, jfunc
))
5564 if (!inter
.exists ())
5565 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, 0);
5567 intersect_with_agg_replacements (cs
->caller
, src_idx
,
5574 class ipcp_param_lattices
*src_plats
;
5575 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5576 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
5578 /* Currently we do not produce clobber aggregate jump
5579 functions, adjust when we do. */
5580 gcc_checking_assert (!jfunc
->agg
.items
);
5581 if (!inter
.exists ())
5582 inter
= copy_plats_to_inter (src_plats
, 0);
5584 intersect_with_plats (src_plats
, &inter
, 0);
5589 else if (jfunc
->type
== IPA_JF_ANCESTOR
5590 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5592 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5593 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5594 class ipcp_param_lattices
*src_plats
;
5595 HOST_WIDE_INT delta
= ipa_get_jf_ancestor_offset (jfunc
);
5597 if (caller_info
->ipcp_orig_node
)
5599 if (!inter
.exists ())
5600 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, delta
);
5602 intersect_with_agg_replacements (cs
->caller
, src_idx
, &inter
,
5607 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5608 /* Currently we do not produce clobber aggregate jump
5609 functions, adjust when we do. */
5610 gcc_checking_assert (!src_plats
->aggs
|| !jfunc
->agg
.items
);
5611 if (!inter
.exists ())
5612 inter
= copy_plats_to_inter (src_plats
, delta
);
5614 intersect_with_plats (src_plats
, &inter
, delta
);
5619 if (jfunc
->agg
.items
)
5621 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5622 struct ipa_agg_value
*item
;
5625 if (!inter
.exists ())
5626 for (unsigned i
= 0; i
< jfunc
->agg
.items
->length (); i
++)
5628 struct ipa_agg_jf_item
*agg_item
= &(*jfunc
->agg
.items
)[i
];
5629 tree value
= ipa_agg_value_from_node (caller_info
, cs
->caller
,
5633 struct ipa_agg_value agg_value
;
5635 agg_value
.value
= value
;
5636 agg_value
.offset
= agg_item
->offset
;
5637 inter
.safe_push (agg_value
);
5641 FOR_EACH_VEC_ELT (inter
, k
, item
)
5649 while ((unsigned) l
< jfunc
->agg
.items
->length ())
5651 struct ipa_agg_jf_item
*ti
;
5652 ti
= &(*jfunc
->agg
.items
)[l
];
5653 if (ti
->offset
> item
->offset
)
5655 if (ti
->offset
== item
->offset
)
5659 /* Besides simple pass-through aggregate jump function,
5660 arithmetic aggregate jump function could also bring
5661 same aggregate value as parameter passed-in for
5662 self-feeding recursive call. For example,
5670 Given that *i is 0, recursive propagation via (*i & 1)
5672 if (self_recursive_agg_pass_through_p (cs
, ti
, index
,
5674 value
= ipa_get_jf_arith_result (
5675 ti
->value
.pass_through
.operation
,
5677 ti
->value
.pass_through
.operand
,
5680 value
= ipa_agg_value_from_node (caller_info
,
5683 if (value
&& values_equal_for_ipcp_p (item
->value
, value
))
5701 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5702 from all of them. */
5704 static struct ipa_agg_replacement_value
*
5705 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5706 const vec
<cgraph_edge
*> &callers
)
5708 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5709 struct ipa_agg_replacement_value
*res
;
5710 struct ipa_agg_replacement_value
**tail
= &res
;
5711 struct cgraph_edge
*cs
;
5712 int i
, j
, count
= ipa_get_param_count (dest_info
);
5714 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5716 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5722 int c
= ipa_get_cs_argument_count (args
);
5727 for (i
= 0; i
< count
; i
++)
5729 struct cgraph_edge
*cs
;
5730 vec
<ipa_agg_value
> inter
= vNULL
;
5731 struct ipa_agg_value
*item
;
5732 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, i
);
5735 /* Among other things, the following check should deal with all by_ref
5737 if (plats
->aggs_bottom
)
5740 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5742 struct ipa_jump_func
*jfunc
5743 = ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), i
);
5744 if (self_recursive_pass_through_p (cs
, jfunc
, i
)
5745 && (!plats
->aggs_by_ref
5746 || ipa_get_jf_pass_through_agg_preserved (jfunc
)))
5748 inter
= intersect_aggregates_with_edge (cs
, i
, inter
);
5750 if (!inter
.exists ())
5754 FOR_EACH_VEC_ELT (inter
, j
, item
)
5756 struct ipa_agg_replacement_value
*v
;
5761 v
= ggc_alloc
<ipa_agg_replacement_value
> ();
5763 v
->offset
= item
->offset
;
5764 v
->value
= item
->value
;
5765 v
->by_ref
= plats
->aggs_by_ref
;
5771 if (inter
.exists ())
5778 /* Determine whether CS also brings all scalar values that the NODE is
5782 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5783 struct cgraph_node
*node
)
5785 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5786 int count
= ipa_get_param_count (dest_info
);
5787 class ipa_node_params
*caller_info
;
5788 class ipa_edge_args
*args
;
5791 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5792 args
= ipa_edge_args_sum
->get (cs
);
5793 for (i
= 0; i
< count
; i
++)
5795 struct ipa_jump_func
*jump_func
;
5798 val
= dest_info
->known_csts
[i
];
5802 if (i
>= ipa_get_cs_argument_count (args
))
5804 jump_func
= ipa_get_ith_jump_func (args
, i
);
5805 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5806 ipa_get_type (dest_info
, i
));
5807 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5813 /* Determine whether CS also brings all aggregate values that NODE is
5816 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5817 struct cgraph_node
*node
)
5819 struct ipa_agg_replacement_value
*aggval
;
5822 aggval
= ipa_get_agg_replacements_for_node (node
);
5826 ipa_node_params
*clone_node_info
= ipa_node_params_sum
->get (node
);
5827 count
= ipa_get_param_count (clone_node_info
);
5828 ec
= ipa_get_cs_argument_count (ipa_edge_args_sum
->get (cs
));
5830 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5831 if (aggval
->index
>= ec
)
5834 ipa_node_params
*orig_node_info
5835 = ipa_node_params_sum
->get (clone_node_info
->ipcp_orig_node
);
5837 for (i
= 0; i
< count
; i
++)
5839 class ipcp_param_lattices
*plats
;
5840 bool interesting
= false;
5841 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5842 if (aggval
->index
== i
)
5850 plats
= ipa_get_parm_lattices (orig_node_info
, aggval
->index
);
5851 if (plats
->aggs_bottom
)
5854 vec
<ipa_agg_value
> values
= intersect_aggregates_with_edge (cs
, i
, vNULL
);
5855 if (!values
.exists ())
5858 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5859 if (aggval
->index
== i
)
5861 struct ipa_agg_value
*item
;
5864 FOR_EACH_VEC_ELT (values
, j
, item
)
5866 && item
->offset
== av
->offset
5867 && values_equal_for_ipcp_p (item
->value
, av
->value
))
5883 /* Given an original NODE and a VAL for which we have already created a
5884 specialized clone, look whether there are incoming edges that still lead
5885 into the old node but now also bring the requested value and also conform to
5886 all other criteria such that they can be redirected the special node.
5887 This function can therefore redirect the final edge in a SCC. */
5889 template <typename valtype
>
5891 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5893 ipcp_value_source
<valtype
> *src
;
5894 profile_count redirected_sum
= profile_count::zero ();
5896 for (src
= val
->sources
; src
; src
= src
->next
)
5898 struct cgraph_edge
*cs
= src
->cs
;
5901 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5902 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5903 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5906 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5907 cs
->caller
->dump_name (),
5908 val
->spec_node
->dump_name ());
5910 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5911 val
->spec_node
->expand_all_artificial_thunks ();
5912 if (cs
->count
.ipa ().initialized_p ())
5913 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5915 cs
= get_next_cgraph_edge_clone (cs
);
5919 if (redirected_sum
.nonzero_p ())
5920 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5923 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5926 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5928 ipa_polymorphic_call_context
*ctx
;
5931 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5932 if (!ctx
->useless_p ())
5937 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5939 static vec
<ipa_polymorphic_call_context
>
5940 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5942 if (known_contexts_useful_p (known_contexts
))
5943 return known_contexts
.copy ();
5948 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5949 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5953 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5954 vec
<tree
> *known_csts
,
5955 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5956 ipcp_value
<tree
> *val
, int index
)
5958 *known_csts
= avals
->m_known_vals
.copy ();
5959 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5960 (*known_csts
)[index
] = val
->value
;
5963 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5964 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5968 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5969 vec
<tree
> *known_csts
,
5970 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5971 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5974 *known_csts
= avals
->m_known_vals
.copy ();
5975 *known_contexts
= avals
->m_known_contexts
.copy ();
5976 (*known_contexts
)[index
] = val
->value
;
5979 /* Return true if OFFSET indicates this was not an aggregate value or there is
5980 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5984 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*aggvals
,
5985 int index
, HOST_WIDE_INT offset
, tree value
)
5992 if (aggvals
->index
== index
5993 && aggvals
->offset
== offset
5994 && values_equal_for_ipcp_p (aggvals
->value
, value
))
5996 aggvals
= aggvals
->next
;
6001 /* Return true if offset is minus one because source of a polymorphic context
6002 cannot be an aggregate value. */
6005 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*,
6006 int , HOST_WIDE_INT offset
,
6007 ipa_polymorphic_call_context
)
6009 return offset
== -1;
6012 /* Decide whether to create a special version of NODE for value VAL of
6013 parameter at the given INDEX. If OFFSET is -1, the value is for the
6014 parameter itself, otherwise it is stored at the given OFFSET of the
6015 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6016 is a vector which contains clones created for self-recursive calls with an
6017 arithmetic pass-through jump function. */
6019 template <typename valtype
>
6021 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6022 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6023 vec
<cgraph_node
*> *self_gen_clones
)
6025 struct ipa_agg_replacement_value
*aggvals
;
6028 profile_count count_sum
, rec_count_sum
;
6029 vec
<cgraph_edge
*> callers
;
6033 perhaps_add_new_callers (node
, val
);
6036 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6039 fprintf (dump_file
, " Ignoring candidate value because "
6040 "maximum unit size would be reached with %li.\n",
6041 val
->local_size_cost
+ overall_size
);
6044 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6045 &rec_count_sum
, &count_sum
))
6048 if (!dbg_cnt (ipa_cp_values
))
6051 if (val
->self_recursion_generated_p ())
6053 /* The edge counts in this case might not have been adjusted yet.
6054 Nevertleless, even if they were it would be only a guesswork which we
6055 can do now. The recursive part of the counts can be derived from the
6056 count of the original node anyway. */
6057 if (node
->count
.ipa ().nonzero_p ())
6059 unsigned dem
= self_gen_clones
->length () + 1;
6060 rec_count_sum
= node
->count
.ipa ().apply_scale (1, dem
);
6063 rec_count_sum
= profile_count::zero ();
6066 /* get_info_about_necessary_edges only sums up ipa counts. */
6067 count_sum
+= rec_count_sum
;
6069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6071 fprintf (dump_file
, " - considering value ");
6072 print_ipcp_constant_value (dump_file
, val
->value
);
6073 fprintf (dump_file
, " for ");
6074 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6076 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6077 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6080 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6081 freq_sum
, count_sum
,
6082 val
->local_size_cost
)
6083 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6084 freq_sum
, count_sum
, val
->prop_size_cost
))
6088 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6089 node
->dump_name ());
6091 vec
<tree
> known_csts
;
6092 vec
<ipa_polymorphic_call_context
> known_contexts
;
6094 callers
= gather_edges_for_value (val
, node
, caller_count
);
6096 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6099 known_csts
= avals
->m_known_vals
.copy ();
6100 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6102 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6103 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6104 aggvals
= find_aggregate_values_for_callers_subset (node
, callers
);
6105 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6106 offset
, val
->value
));
6107 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6110 if (val
->self_recursion_generated_p ())
6111 self_gen_clones
->safe_push (val
->spec_node
);
6113 update_profiling_info (node
, val
->spec_node
);
6116 overall_size
+= val
->local_size_cost
;
6117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6118 fprintf (dump_file
, " overall size reached %li\n",
6121 /* TODO: If for some lattice there is only one other known value
6122 left, make a special node for it too. */
6127 /* Decide whether and what specialized clones of NODE should be created. */
6130 decide_whether_version_node (struct cgraph_node
*node
)
6132 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6133 int i
, count
= ipa_get_param_count (info
);
6139 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6140 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6141 node
->dump_name ());
6143 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6144 ipa_auto_call_arg_values avals
;
6145 gather_context_independent_values (info
, &avals
, false, NULL
);
6147 for (i
= 0; i
< count
;i
++)
6149 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6150 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6151 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6154 && !avals
.m_known_vals
[i
])
6156 ipcp_value
<tree
> *val
;
6157 for (val
= lat
->values
; val
; val
= val
->next
)
6158 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6162 if (!plats
->aggs_bottom
)
6164 struct ipcp_agg_lattice
*aglat
;
6165 ipcp_value
<tree
> *val
;
6166 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6167 if (!aglat
->bottom
&& aglat
->values
6168 /* If the following is false, the one value has been considered
6169 for cloning for all contexts. */
6170 && (plats
->aggs_contain_variable
6171 || !aglat
->is_single_const ()))
6172 for (val
= aglat
->values
; val
; val
= val
->next
)
6173 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6178 && avals
.m_known_contexts
[i
].useless_p ())
6180 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6181 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6182 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6187 if (!self_gen_clones
.is_empty ())
6189 self_gen_clones
.safe_push (node
);
6190 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6193 if (info
->do_clone_for_all_contexts
)
6195 if (!dbg_cnt (ipa_cp_values
))
6197 info
->do_clone_for_all_contexts
= false;
6201 struct cgraph_node
*clone
;
6202 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6204 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6206 cgraph_edge
*cs
= callers
[i
];
6207 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6209 if (caller_info
&& caller_info
->node_dead
)
6210 callers
.unordered_remove (i
);
6213 if (!adjust_callers_for_value_intersection (callers
, node
))
6215 /* If node is not called by anyone, or all its caller edges are
6216 self-recursive, the node is not really in use, no need to do
6218 info
->do_clone_for_all_contexts
= false;
6223 fprintf (dump_file
, " - Creating a specialized node of %s "
6224 "for all known contexts.\n", node
->dump_name ());
6226 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6227 vec
<ipa_polymorphic_call_context
> known_contexts
6228 = copy_useful_known_contexts (avals
.m_known_contexts
);
6229 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6230 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6231 ipa_agg_replacement_value
*aggvals
6232 = find_aggregate_values_for_callers_subset (node
, callers
);
6234 if (!known_contexts_useful_p (known_contexts
))
6236 known_contexts
.release ();
6237 known_contexts
= vNULL
;
6239 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6241 info
->do_clone_for_all_contexts
= false;
6242 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6249 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6252 spread_undeadness (struct cgraph_node
*node
)
6254 struct cgraph_edge
*cs
;
6256 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6257 if (ipa_edge_within_scc (cs
))
6259 struct cgraph_node
*callee
;
6260 class ipa_node_params
*info
;
6262 callee
= cs
->callee
->function_symbol (NULL
);
6263 info
= ipa_node_params_sum
->get (callee
);
6265 if (info
&& info
->node_dead
)
6267 info
->node_dead
= 0;
6268 spread_undeadness (callee
);
6273 /* Return true if NODE has a caller from outside of its SCC that is not
6274 dead. Worker callback for cgraph_for_node_and_aliases. */
6277 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6278 void *data ATTRIBUTE_UNUSED
)
6280 struct cgraph_edge
*cs
;
6282 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6283 if (cs
->caller
->thunk
6284 && cs
->caller
->call_for_symbol_thunks_and_aliases
6285 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6287 else if (!ipa_edge_within_scc (cs
))
6289 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6290 if (!caller_info
/* Unoptimized caller are like dead ones. */
6291 || !caller_info
->node_dead
)
6298 /* Identify nodes within the same SCC as NODE which are no longer needed
6299 because of new clones and will be removed as unreachable. */
6302 identify_dead_nodes (struct cgraph_node
*node
)
6304 struct cgraph_node
*v
;
6305 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6308 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6310 && !v
->call_for_symbol_thunks_and_aliases
6311 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6312 info
->node_dead
= 1;
6315 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6317 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6318 if (info
&& !info
->node_dead
)
6319 spread_undeadness (v
);
6322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6324 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6325 if (ipa_node_params_sum
->get (v
)
6326 && ipa_node_params_sum
->get (v
)->node_dead
)
6327 fprintf (dump_file
, " Marking node as dead: %s.\n",
6332 /* The decision stage. Iterate over the topological order of call graph nodes
6333 TOPO and make specialized clones if deemed beneficial. */
6336 ipcp_decision_stage (class ipa_topo_info
*topo
)
6341 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6343 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6345 struct cgraph_node
*node
= topo
->order
[i
];
6346 bool change
= false, iterate
= true;
6350 struct cgraph_node
*v
;
6352 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6353 if (v
->has_gimple_body_p ()
6354 && ipcp_versionable_function_p (v
))
6355 iterate
|= decide_whether_version_node (v
);
6360 identify_dead_nodes (node
);
6364 /* Look up all the bits information that we have discovered and copy it over
6365 to the transformation summary. */
6368 ipcp_store_bits_results (void)
6372 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6374 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6375 bool dumped_sth
= false;
6376 bool found_useful_result
= false;
6378 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
6381 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
6382 "; -fipa-bit-cp: disabled.\n",
6383 node
->dump_name ());
6387 if (info
->ipcp_orig_node
)
6388 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6389 if (!info
->lattices
)
6390 /* Newly expanded artificial thunks do not have lattices. */
6393 unsigned count
= ipa_get_param_count (info
);
6394 for (unsigned i
= 0; i
< count
; i
++)
6396 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6397 if (plats
->bits_lattice
.constant_p ())
6399 found_useful_result
= true;
6404 if (!found_useful_result
)
6407 ipcp_transformation_initialize ();
6408 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6409 vec_safe_reserve_exact (ts
->bits
, count
);
6411 for (unsigned i
= 0; i
< count
; i
++)
6413 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6416 if (plats
->bits_lattice
.constant_p ())
6419 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
6420 plats
->bits_lattice
.get_mask ());
6421 if (!dbg_cnt (ipa_cp_bits
))
6427 ts
->bits
->quick_push (jfbits
);
6428 if (!dump_file
|| !jfbits
)
6432 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6433 node
->dump_name ());
6436 fprintf (dump_file
, " param %i: value = ", i
);
6437 print_hex (jfbits
->value
, dump_file
);
6438 fprintf (dump_file
, ", mask = ");
6439 print_hex (jfbits
->mask
, dump_file
);
6440 fprintf (dump_file
, "\n");
6445 /* Look up all VR information that we have discovered and copy it over
6446 to the transformation summary. */
6449 ipcp_store_vr_results (void)
6453 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6455 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6456 bool found_useful_result
= false;
6458 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6461 fprintf (dump_file
, "Not considering %s for VR discovery "
6462 "and propagate; -fipa-ipa-vrp: disabled.\n",
6463 node
->dump_name ());
6467 if (info
->ipcp_orig_node
)
6468 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6469 if (!info
->lattices
)
6470 /* Newly expanded artificial thunks do not have lattices. */
6473 unsigned count
= ipa_get_param_count (info
);
6474 for (unsigned i
= 0; i
< count
; i
++)
6476 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6477 if (!plats
->m_value_range
.bottom_p ()
6478 && !plats
->m_value_range
.top_p ())
6480 found_useful_result
= true;
6484 if (!found_useful_result
)
6487 ipcp_transformation_initialize ();
6488 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6489 vec_safe_reserve_exact (ts
->m_vr
, count
);
6491 for (unsigned i
= 0; i
< count
; i
++)
6493 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6496 if (!plats
->m_value_range
.bottom_p ()
6497 && !plats
->m_value_range
.top_p ()
6498 && dbg_cnt (ipa_cp_vr
))
6501 vr
.type
= plats
->m_value_range
.m_vr
.kind ();
6502 vr
.min
= wi::to_wide (plats
->m_value_range
.m_vr
.min ());
6503 vr
.max
= wi::to_wide (plats
->m_value_range
.m_vr
.max ());
6508 vr
.type
= VR_VARYING
;
6509 vr
.min
= vr
.max
= wi::zero (INT_TYPE_SIZE
);
6511 ts
->m_vr
->quick_push (vr
);
6516 /* The IPCP driver. */
6521 class ipa_topo_info topo
;
6523 if (edge_clone_summaries
== NULL
)
6524 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6526 ipa_check_create_node_params ();
6527 ipa_check_create_edge_args ();
6528 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6532 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6533 if (dump_flags
& TDF_DETAILS
)
6534 ipa_print_all_params (dump_file
);
6535 ipa_print_all_jump_functions (dump_file
);
6538 /* Topological sort. */
6539 build_toporder_info (&topo
);
6540 /* Do the interprocedural propagation. */
6541 ipcp_propagate_stage (&topo
);
6542 /* Decide what constant propagation and cloning should be performed. */
6543 ipcp_decision_stage (&topo
);
6544 /* Store results of bits propagation. */
6545 ipcp_store_bits_results ();
6546 /* Store results of value range propagation. */
6547 ipcp_store_vr_results ();
6549 /* Free all IPCP structures. */
6550 delete clone_num_suffixes
;
6551 free_toporder_info (&topo
);
6552 delete edge_clone_summaries
;
6553 edge_clone_summaries
= NULL
;
6554 ipa_free_all_structures_after_ipa_cp ();
6556 fprintf (dump_file
, "\nIPA constant propagation end\n");
6560 /* Initialization and computation of IPCP data structures. This is the initial
6561 intraprocedural analysis of functions, which gathers information to be
6562 propagated later on. */
6565 ipcp_generate_summary (void)
6567 struct cgraph_node
*node
;
6570 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6571 ipa_register_cgraph_hooks ();
6573 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6574 ipa_analyze_node (node
);
6579 const pass_data pass_data_ipa_cp
=
6581 IPA_PASS
, /* type */
6583 OPTGROUP_NONE
, /* optinfo_flags */
6584 TV_IPA_CONSTANT_PROP
, /* tv_id */
6585 0, /* properties_required */
6586 0, /* properties_provided */
6587 0, /* properties_destroyed */
6588 0, /* todo_flags_start */
6589 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6592 class pass_ipa_cp
: public ipa_opt_pass_d
6595 pass_ipa_cp (gcc::context
*ctxt
)
6596 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6597 ipcp_generate_summary
, /* generate_summary */
6598 NULL
, /* write_summary */
6599 NULL
, /* read_summary */
6600 ipcp_write_transformation_summaries
, /*
6601 write_optimization_summary */
6602 ipcp_read_transformation_summaries
, /*
6603 read_optimization_summary */
6604 NULL
, /* stmt_fixup */
6605 0, /* function_transform_todo_flags_start */
6606 ipcp_transform_function
, /* function_transform */
6607 NULL
) /* variable_transform */
6610 /* opt_pass methods: */
6611 virtual bool gate (function
*)
6613 /* FIXME: We should remove the optimize check after we ensure we never run
6614 IPA passes when not optimizing. */
6615 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6618 virtual unsigned int execute (function
*) { return ipcp_driver (); }
6620 }; // class pass_ipa_cp
6625 make_pass_ipa_cp (gcc::context
*ctxt
)
6627 return new pass_ipa_cp (ctxt
);
6630 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6631 within the same process. For use by toplev::finalize. */
6634 ipa_cp_c_finalize (void)
6636 base_count
= profile_count::uninitialized ();
6638 orig_overall_size
= 0;
6639 ipcp_free_transformation_sum ();