1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
103 #define INCLUDE_ALGORITHM
106 #include "coretypes.h"
109 #include "gimple-expr.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype
> class ipcp_value
;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype
>
138 struct ipcp_value_source
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset
;
144 /* The incoming edge that brought the value. */
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value
<valtype
> *val
;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source
*next
;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit
;
166 /* Time benefit that specializing the function for this value can bring about
168 sreal prop_time_benefit
;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
172 /* Size cost that specializing the function for this value can bring about in
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype
>
184 class ipcp_value
: public ipcp_value_base
187 /* The actual value for the given parameter. */
189 /* The list of sources from which this value originates. */
190 ipcp_value_source
<valtype
> *sources
= nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value
*next
= nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
195 ipcp_value
*scc_next
= nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value
*topo_next
= nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
201 cgraph_node
*spec_node
= nullptr;
202 /* Depth first search number and low link for topological sorting of
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
214 unsigned self_recursion_generated_level
= 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack
= false;
218 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
219 HOST_WIDE_INT offset
);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value
<valtype
> *o
)
225 return o
->scc_no
== scc_no
;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level
> 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype
>
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value
<valtype
> *values
;
252 /* Number of known values and types in this lattice. */
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable
;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval
, cgraph_edge
*cs
,
264 ipcp_value
<valtype
> *src_val
= NULL
,
265 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
266 ipcp_value
<valtype
> **val_p
= NULL
,
267 unsigned same_lat_gen_level
= 0);
268 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
271 /* Lattice of tree values with an offset to describe a part of an
274 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset
;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice
*next
;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
312 bool bottom_p () const { return m_lattice_val
== IPA_BITS_VARYING
; }
313 bool top_p () const { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
314 bool constant_p () const { return m_lattice_val
== IPA_BITS_CONSTANT
; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int
, widest_int
);
317 bool known_nonzero_p () const;
319 widest_int
get_value () const { return m_value
; }
320 widest_int
get_mask () const { return m_mask
; }
322 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
323 enum tree_code
, tree
, bool);
325 bool meet_with (widest_int
, widest_int
, unsigned);
330 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value
, m_mask
;
337 bool meet_with_1 (widest_int
, widest_int
, unsigned, bool);
338 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange
&p_vr
);
352 bool meet_with (const ipcp_vr_lattice
&other
);
353 void init (tree type
);
354 void print (FILE * f
);
357 bool meet_with_1 (const vrange
&other_vr
);
361 ipcp_vr_lattice::init (tree type
)
364 m_vr
.set_type (type
);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice
<tree
> itself
;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice
*aggs
;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice
;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range
;
386 /* Number of aggregate lattices */
388 /* True if aggregate data were passed by reference (as opposed to by
391 /* All aggregate lattices contain a variable component (in addition to
393 bool aggs_contain_variable
;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
398 /* There is a virtual call based on this parameter. */
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count
;
420 /* Original overall size of the program. */
422 static long overall_size
, orig_overall_size
;
424 /* Node name to unique clone suffix number map. */
425 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices
*
430 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
432 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
433 gcc_checking_assert (!info
->ipcp_orig_node
);
434 gcc_checking_assert (info
->lattices
);
435 return &(info
->lattices
[i
]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice
<tree
> *
441 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
443 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
444 return &plats
->itself
;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
450 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
452 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
453 return &plats
->ctxlat
;
456 /* Return whether LAT is a lattice with a single constant and without an
459 template <typename valtype
>
461 ipcp_lattice
<valtype
>::is_single_const ()
463 if (bottom
|| contains_variable
|| values_count
!= 1)
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
472 values_equal_for_ipcp_p (tree x
, tree y
)
474 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
479 if (TREE_CODE (x
) == ADDR_EXPR
480 && TREE_CODE (y
) == ADDR_EXPR
481 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
482 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
483 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
484 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
486 return operand_equal_p (x
, y
, 0);
489 /* Print V which is extracted from a value in a lattice to F. */
492 print_ipcp_constant_value (FILE * f
, tree v
)
494 if (TREE_CODE (v
) == ADDR_EXPR
495 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
498 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
501 print_generic_expr (f
, v
);
504 /* Print V which is extracted from a value in a lattice to F. */
507 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
512 /* Print a lattice LAT to F. */
514 template <typename valtype
>
516 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
518 ipcp_value
<valtype
> *val
;
523 fprintf (f
, "BOTTOM\n");
527 if (!values_count
&& !contains_variable
)
529 fprintf (f
, "TOP\n");
533 if (contains_variable
)
535 fprintf (f
, "VARIABLE");
541 for (val
= values
; val
; val
= val
->next
)
543 if (dump_benefits
&& prev
)
545 else if (!dump_benefits
&& prev
)
550 print_ipcp_constant_value (f
, val
->value
);
554 ipcp_value_source
<valtype
> *s
;
556 if (val
->self_recursion_generated_p ())
557 fprintf (f
, " [self_gen(%i), from:",
558 val
->self_recursion_generated_level
);
560 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
561 for (s
= val
->sources
; s
; s
= s
->next
)
562 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
563 s
->cs
->sreal_frequency ().to_double ());
568 fprintf (f
, " [loc_time: %g, loc_size: %i, "
569 "prop_time: %g, prop_size: %i]\n",
570 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
571 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
578 ipcp_bits_lattice::print (FILE *f
)
581 fprintf (f
, " Bits unknown (TOP)\n");
582 else if (bottom_p ())
583 fprintf (f
, " Bits unusable (BOTTOM)\n");
586 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
587 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
592 /* Print value range lattice to F. */
595 ipcp_vr_lattice::print (FILE * f
)
600 /* Print all ipcp_lattices of all functions to F. */
603 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
605 struct cgraph_node
*node
;
608 fprintf (f
, "\nLattices:\n");
609 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
611 class ipa_node_params
*info
;
613 info
= ipa_node_params_sum
->get (node
);
614 /* Skip unoptimized functions and constprop clones since we don't make
615 lattices for them. */
616 if (!info
|| info
->ipcp_orig_node
)
618 fprintf (f
, " Node: %s:\n", node
->dump_name ());
619 count
= ipa_get_param_count (info
);
620 for (i
= 0; i
< count
; i
++)
622 struct ipcp_agg_lattice
*aglat
;
623 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
624 fprintf (f
, " param [%d]: ", i
);
625 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
626 fprintf (f
, " ctxs: ");
627 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
628 plats
->bits_lattice
.print (f
);
630 plats
->m_value_range
.print (f
);
632 if (plats
->virt_call
)
633 fprintf (f
, " virt_call flag set\n");
635 if (plats
->aggs_bottom
)
637 fprintf (f
, " AGGS BOTTOM\n");
640 if (plats
->aggs_contain_variable
)
641 fprintf (f
, " AGGS VARIABLE\n");
642 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
644 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
645 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
646 aglat
->print (f
, dump_sources
, dump_benefits
);
652 /* Determine whether it is at all technically possible to create clones of NODE
653 and store this information in the ipa_node_params structure associated
657 determine_versionability (struct cgraph_node
*node
,
658 class ipa_node_params
*info
)
660 const char *reason
= NULL
;
662 /* There are a number of generic reasons functions cannot be versioned. We
663 also cannot remove parameters if there are type attributes such as fnspec
665 if (node
->alias
|| node
->thunk
)
666 reason
= "alias or thunk";
667 else if (!node
->versionable
)
668 reason
= "not a tree_versionable_function";
669 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
670 reason
= "insufficient body availability";
671 else if (!opt_for_fn (node
->decl
, optimize
)
672 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
673 reason
= "non-optimized function";
674 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
676 /* Ideally we should clone the SIMD clones themselves and create
677 vector copies of them, so IPA-cp and SIMD clones can happily
678 coexist, but that may not be worth the effort. */
679 reason
= "function has SIMD clones";
681 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
683 /* Ideally we should clone the target clones themselves and create
684 copies of them, so IPA-cp and target clones can happily
685 coexist, but that may not be worth the effort. */
686 reason
= "function target_clones attribute";
688 /* Don't clone decls local to a comdat group; it breaks and for C++
689 decloned constructors, inlining is always better anyway. */
690 else if (node
->comdat_local_p ())
691 reason
= "comdat-local function";
692 else if (node
->calls_comdat_local
)
694 /* TODO: call is versionable if we make sure that all
695 callers are inside of a comdat group. */
696 reason
= "calls comdat-local function";
699 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
700 work only when inlined. Cloning them may still lead to better code
701 because ipa-cp will not give up on cloning further. If the function is
702 external this however leads to wrong code because we may end up producing
703 offline copy of the function. */
704 if (DECL_EXTERNAL (node
->decl
))
705 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
706 edge
= edge
->next_callee
)
707 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
709 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
710 reason
= "external function which calls va_arg_pack";
711 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
712 == BUILT_IN_VA_ARG_PACK_LEN
)
713 reason
= "external function which calls va_arg_pack_len";
716 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
717 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
718 node
->dump_name (), reason
);
720 info
->versionable
= (reason
== NULL
);
723 /* Return true if it is at all technically possible to create clones of a
727 ipcp_versionable_function_p (struct cgraph_node
*node
)
729 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
730 return info
&& info
->versionable
;
733 /* Structure holding accumulated information about callers of a node. */
735 struct caller_statistics
737 /* If requested (see below), self-recursive call counts are summed into this
739 profile_count rec_count_sum
;
740 /* The sum of all ipa counts of all the other (non-recursive) calls. */
741 profile_count count_sum
;
742 /* Sum of all frequencies for all calls. */
744 /* Number of calls and hot calls respectively. */
745 int n_calls
, n_hot_calls
;
746 /* If itself is set up, also count the number of non-self-recursive
749 /* If non-NULL, this is the node itself and calls from it should have their
750 counts included in rec_count_sum and not count_sum. */
754 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
755 from IGNORED_CALLER are not counted. */
758 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
760 stats
->rec_count_sum
= profile_count::zero ();
761 stats
->count_sum
= profile_count::zero ();
763 stats
->n_hot_calls
= 0;
764 stats
->n_nonrec_calls
= 0;
766 stats
->itself
= itself
;
769 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
770 non-thunk incoming edges to NODE. */
773 gather_caller_stats (struct cgraph_node
*node
, void *data
)
775 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
776 struct cgraph_edge
*cs
;
778 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
779 if (!cs
->caller
->thunk
)
781 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
782 if (info
&& info
->node_dead
)
785 if (cs
->count
.ipa ().initialized_p ())
787 if (stats
->itself
&& stats
->itself
== cs
->caller
)
788 stats
->rec_count_sum
+= cs
->count
.ipa ();
790 stats
->count_sum
+= cs
->count
.ipa ();
792 stats
->freq_sum
+= cs
->sreal_frequency ();
794 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
795 stats
->n_nonrec_calls
++;
797 if (cs
->maybe_hot_p ())
798 stats
->n_hot_calls
++;
804 /* Return true if this NODE is viable candidate for cloning. */
807 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
809 struct caller_statistics stats
;
811 gcc_checking_assert (node
->has_gimple_body_p ());
813 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
816 fprintf (dump_file
, "Not considering %s for cloning; "
817 "-fipa-cp-clone disabled.\n",
822 if (node
->optimize_for_size_p ())
825 fprintf (dump_file
, "Not considering %s for cloning; "
826 "optimizing it for size.\n",
831 init_caller_stats (&stats
);
832 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
834 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
837 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
842 /* When profile is available and function is hot, propagate into it even if
843 calls seems cold; constant propagation can improve function's speed
845 if (stats
.count_sum
> profile_count::zero ()
846 && node
->count
.ipa ().initialized_p ())
848 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
851 fprintf (dump_file
, "Considering %s for cloning; "
852 "usually called directly.\n",
857 if (!stats
.n_hot_calls
)
860 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
865 fprintf (dump_file
, "Considering %s for cloning.\n",
870 template <typename valtype
>
871 class value_topo_info
874 /* Head of the linked list of topologically sorted values. */
875 ipcp_value
<valtype
> *values_topo
;
876 /* Stack for creating SCCs, represented by a linked list too. */
877 ipcp_value
<valtype
> *stack
;
878 /* Counter driving the algorithm in add_val_to_toposort. */
881 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
883 void add_val (ipcp_value
<valtype
> *cur_val
);
884 void propagate_effects ();
887 /* Arrays representing a topological ordering of call graph nodes and a stack
888 of nodes used during constant propagation and also data required to perform
889 topological sort of values and propagation of benefits in the determined
895 /* Array with obtained topological order of cgraph nodes. */
896 struct cgraph_node
**order
;
897 /* Stack of cgraph nodes used during propagation within SCC until all values
898 in the SCC stabilize. */
899 struct cgraph_node
**stack
;
900 int nnodes
, stack_top
;
902 value_topo_info
<tree
> constants
;
903 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
905 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
910 /* Skip edges from and to nodes without ipa_cp enabled.
911 Ignore not available symbols. */
914 ignore_edge_p (cgraph_edge
*e
)
916 enum availability avail
;
917 cgraph_node
*ultimate_target
918 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
920 return (avail
<= AVAIL_INTERPOSABLE
921 || !opt_for_fn (ultimate_target
->decl
, optimize
)
922 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
925 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
928 build_toporder_info (class ipa_topo_info
*topo
)
930 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
931 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
933 gcc_checking_assert (topo
->stack_top
== 0);
934 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
938 /* Free information about strongly connected components and the arrays in
942 free_toporder_info (class ipa_topo_info
*topo
)
944 ipa_free_postorder_info ();
949 /* Add NODE to the stack in TOPO, unless it is already there. */
952 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
954 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
955 if (info
->node_enqueued
)
957 info
->node_enqueued
= 1;
958 topo
->stack
[topo
->stack_top
++] = node
;
961 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
964 static struct cgraph_node
*
965 pop_node_from_stack (class ipa_topo_info
*topo
)
969 struct cgraph_node
*node
;
971 node
= topo
->stack
[topo
->stack_top
];
972 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
979 /* Set lattice LAT to bottom and return true if it previously was not set as
982 template <typename valtype
>
984 ipcp_lattice
<valtype
>::set_to_bottom ()
991 /* Mark lattice as containing an unknown value and return true if it previously
992 was not marked as such. */
994 template <typename valtype
>
996 ipcp_lattice
<valtype
>::set_contains_variable ()
998 bool ret
= !contains_variable
;
999 contains_variable
= true;
1003 /* Set all aggregate lattices in PLATS to bottom and return true if they were
1004 not previously set as such. */
1007 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
1009 bool ret
= !plats
->aggs_bottom
;
1010 plats
->aggs_bottom
= true;
1014 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1015 return true if they were not previously marked as such. */
1018 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
1020 bool ret
= !plats
->aggs_contain_variable
;
1021 plats
->aggs_contain_variable
= true;
1026 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
1028 return meet_with_1 (other
.m_vr
);
1031 /* Meet the current value of the lattice with the range described by
1035 ipcp_vr_lattice::meet_with (const vrange
&p_vr
)
1037 return meet_with_1 (p_vr
);
1040 /* Meet the current value of the lattice with the range described by
1041 OTHER_VR. Return TRUE if anything changed. */
1044 ipcp_vr_lattice::meet_with_1 (const vrange
&other_vr
)
1049 if (other_vr
.varying_p ())
1050 return set_to_bottom ();
1055 Value_Range
save (m_vr
);
1056 res
= m_vr
.union_ (other_vr
);
1057 gcc_assert (res
== (m_vr
!= save
));
1060 res
= m_vr
.union_ (other_vr
);
1064 /* Return true if value range information in the lattice is yet unknown. */
1067 ipcp_vr_lattice::top_p () const
1069 return m_vr
.undefined_p ();
1072 /* Return true if value range information in the lattice is known to be
1076 ipcp_vr_lattice::bottom_p () const
1078 return m_vr
.varying_p ();
1081 /* Set value range information in the lattice to bottom. Return true if it
1082 previously was in a different state. */
1085 ipcp_vr_lattice::set_to_bottom ()
1087 if (m_vr
.varying_p ())
1090 /* Setting an unsupported type here forces the temporary to default
1091 to unsupported_range, which can handle VARYING/DEFINED ranges,
1092 but nothing else (union, intersect, etc). This allows us to set
1093 bottoms on any ranges, and is safe as all users of the lattice
1094 check for bottom first. */
1095 m_vr
.set_type (void_type_node
);
1096 m_vr
.set_varying (void_type_node
);
1101 /* Set lattice value to bottom, if it already isn't the case. */
1104 ipcp_bits_lattice::set_to_bottom ()
1108 m_lattice_val
= IPA_BITS_VARYING
;
1114 /* Set to constant if it isn't already. Only meant to be called
1115 when switching state from TOP. */
1118 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1120 gcc_assert (top_p ());
1121 m_lattice_val
= IPA_BITS_CONSTANT
;
1122 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1127 /* Return true if any of the known bits are non-zero. */
1130 ipcp_bits_lattice::known_nonzero_p () const
1134 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
1137 /* Convert operand to value, mask form. */
1140 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1142 wide_int
get_nonzero_bits (const_tree
);
1144 if (TREE_CODE (operand
) == INTEGER_CST
)
1146 *valuep
= wi::to_widest (operand
);
1156 /* Meet operation, similar to ccp_lattice_meet, we xor values
1157 if this->value, value have different values at same bit positions, we want
1158 to drop that bit to varying. Return true if mask is changed.
1159 This function assumes that the lattice value is in CONSTANT state. If
1160 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1163 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1164 unsigned precision
, bool drop_all_ones
)
1166 gcc_assert (constant_p ());
1168 widest_int old_mask
= m_mask
;
1169 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1174 if (wi::sext (m_mask
, precision
) == -1)
1175 return set_to_bottom ();
1177 return m_mask
!= old_mask
;
1180 /* Meet the bits lattice with operand
1181 described by <value, mask, sgn, precision. */
1184 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1192 if (wi::sext (mask
, precision
) == -1)
1193 return set_to_bottom ();
1194 return set_to_constant (value
, mask
);
1197 return meet_with_1 (value
, mask
, precision
, false);
1200 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1201 if code is binary operation or bit_value_unop (other) if code is unary op.
1202 In the case when code is nop_expr, no adjustment is required. If
1203 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1206 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1207 signop sgn
, enum tree_code code
, tree operand
,
1210 if (other
.bottom_p ())
1211 return set_to_bottom ();
1213 if (bottom_p () || other
.top_p ())
1216 widest_int adjusted_value
, adjusted_mask
;
1218 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1220 tree type
= TREE_TYPE (operand
);
1221 widest_int o_value
, o_mask
;
1222 get_value_and_mask (operand
, &o_value
, &o_mask
);
1224 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1225 sgn
, precision
, other
.get_value (), other
.get_mask (),
1226 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1228 if (wi::sext (adjusted_mask
, precision
) == -1)
1229 return set_to_bottom ();
1232 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1234 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1235 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1238 if (wi::sext (adjusted_mask
, precision
) == -1)
1239 return set_to_bottom ();
1243 return set_to_bottom ();
1249 adjusted_mask
|= adjusted_value
;
1250 adjusted_value
&= ~adjusted_mask
;
1252 if (wi::sext (adjusted_mask
, precision
) == -1)
1253 return set_to_bottom ();
1254 return set_to_constant (adjusted_value
, adjusted_mask
);
1257 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
1261 /* Dump the contents of the list to FILE. */
1264 ipa_argagg_value_list::dump (FILE *f
)
1267 for (const ipa_argagg_value
&av
: m_elts
)
1269 fprintf (f
, "%s %i[%u]=", comma
? "," : "",
1270 av
.index
, av
.unit_offset
);
1271 print_generic_expr (f
, av
.value
);
1273 fprintf (f
, "(by_ref)");
1275 fprintf (f
, "(killed)");
1281 /* Dump the contents of the list to stderr. */
1284 ipa_argagg_value_list::debug ()
1289 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1290 NULL if there is no such constant. */
1292 const ipa_argagg_value
*
1293 ipa_argagg_value_list::get_elt (int index
, unsigned unit_offset
) const
1295 ipa_argagg_value key
;
1297 key
.unit_offset
= unit_offset
;
1298 const ipa_argagg_value
*res
1299 = std::lower_bound (m_elts
.begin (), m_elts
.end (), key
,
1300 [] (const ipa_argagg_value
&elt
,
1301 const ipa_argagg_value
&val
)
1303 if (elt
.index
< val
.index
)
1305 if (elt
.index
> val
.index
)
1307 if (elt
.unit_offset
< val
.unit_offset
)
1312 if (res
== m_elts
.end ()
1313 || res
->index
!= index
1314 || res
->unit_offset
!= unit_offset
)
1317 /* TODO: perhaps remove the check (that the underlying array is indeed
1318 sorted) if it turns out it can be too slow? */
1322 const ipa_argagg_value
*slow_res
= NULL
;
1323 int prev_index
= -1;
1324 unsigned prev_unit_offset
= 0;
1325 for (const ipa_argagg_value
&av
: m_elts
)
1327 gcc_assert (prev_index
< 0
1328 || prev_index
< av
.index
1329 || prev_unit_offset
< av
.unit_offset
);
1330 prev_index
= av
.index
;
1331 prev_unit_offset
= av
.unit_offset
;
1332 if (av
.index
== index
1333 && av
.unit_offset
== unit_offset
)
1336 gcc_assert (res
== slow_res
);
1341 /* Return the first item describing a constant stored for parameter with INDEX,
1342 regardless of offset or reference, or NULL if there is no such constant. */
1344 const ipa_argagg_value
*
1345 ipa_argagg_value_list::get_elt_for_index (int index
) const
1347 const ipa_argagg_value
*res
1348 = std::lower_bound (m_elts
.begin (), m_elts
.end (), index
,
1349 [] (const ipa_argagg_value
&elt
, unsigned idx
)
1351 return elt
.index
< idx
;
1353 if (res
== m_elts
.end ()
1354 || res
->index
!= index
)
1359 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1360 performing any check of whether value is passed by reference, or NULL_TREE
1361 if there is no such constant. */
1364 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
) const
1366 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1367 return av
? av
->value
: NULL_TREE
;
1370 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1371 passed by reference or not according to BY_REF, or NULL_TREE if there is
1372 no such constant. */
1375 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
,
1378 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1379 if (av
&& av
->by_ref
== by_ref
)
1384 /* Return true if all elements present in OTHER are also present in this
1388 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list
&other
) const
1391 for (unsigned i
= 0; i
< other
.m_elts
.size (); i
++)
1393 unsigned other_index
= other
.m_elts
[i
].index
;
1394 unsigned other_offset
= other
.m_elts
[i
].unit_offset
;
1396 while (j
< m_elts
.size ()
1397 && (m_elts
[j
].index
< other_index
1398 || (m_elts
[j
].index
== other_index
1399 && m_elts
[j
].unit_offset
< other_offset
)))
1402 if (j
>= m_elts
.size ()
1403 || m_elts
[j
].index
!= other_index
1404 || m_elts
[j
].unit_offset
!= other_offset
1405 || m_elts
[j
].by_ref
!= other
.m_elts
[i
].by_ref
1407 || !values_equal_for_ipcp_p (m_elts
[j
].value
, other
.m_elts
[i
].value
))
1413 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1414 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1415 offsets but skip those which would end up with a negative offset. */
1418 ipa_argagg_value_list::push_adjusted_values (unsigned src_index
,
1419 unsigned dest_index
,
1420 unsigned unit_delta
,
1421 vec
<ipa_argagg_value
> *res
) const
1423 const ipa_argagg_value
*av
= get_elt_for_index (src_index
);
1426 unsigned prev_unit_offset
= 0;
1428 for (; av
< m_elts
.end (); ++av
)
1430 if (av
->index
> src_index
)
1432 if (av
->index
== src_index
1433 && (av
->unit_offset
>= unit_delta
)
1436 ipa_argagg_value new_av
;
1437 gcc_checking_assert (av
->value
);
1438 new_av
.value
= av
->value
;
1439 new_av
.unit_offset
= av
->unit_offset
- unit_delta
;
1440 new_av
.index
= dest_index
;
1441 new_av
.by_ref
= av
->by_ref
;
1442 gcc_assert (!av
->killed
);
1443 new_av
.killed
= false;
1445 /* Quick check that the offsets we push are indeed increasing. */
1447 || new_av
.unit_offset
> prev_unit_offset
);
1448 prev_unit_offset
= new_av
.unit_offset
;
1451 res
->safe_push (new_av
);
1456 /* Push to RES information about single lattices describing aggregate values in
1457 PLATS as those describing parameter DEST_INDEX and the original offset minus
1458 UNIT_DELTA. Return true if any item has been pushed to RES. */
1461 push_agg_values_from_plats (ipcp_param_lattices
*plats
, int dest_index
,
1462 unsigned unit_delta
,
1463 vec
<ipa_argagg_value
> *res
)
1465 if (plats
->aggs_contain_variable
)
1468 bool pushed_sth
= false;
1470 unsigned prev_unit_offset
= 0;
1471 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
1472 if (aglat
->is_single_const ()
1473 && (aglat
->offset
/ BITS_PER_UNIT
- unit_delta
) >= 0)
1475 ipa_argagg_value iav
;
1476 iav
.value
= aglat
->values
->value
;
1477 iav
.unit_offset
= aglat
->offset
/ BITS_PER_UNIT
- unit_delta
;
1478 iav
.index
= dest_index
;
1479 iav
.by_ref
= plats
->aggs_by_ref
;
1483 || iav
.unit_offset
> prev_unit_offset
);
1484 prev_unit_offset
= iav
.unit_offset
;
1488 res
->safe_push (iav
);
1493 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1494 Return the number of remaining valid entries. */
1497 intersect_argaggs_with (vec
<ipa_argagg_value
> &elts
,
1498 const vec
<ipa_argagg_value
> &other
)
1500 unsigned valid_entries
= 0;
1502 for (unsigned i
= 0; i
< elts
.length (); i
++)
1507 unsigned this_index
= elts
[i
].index
;
1508 unsigned this_offset
= elts
[i
].unit_offset
;
1510 while (j
< other
.length ()
1511 && (other
[j
].index
< this_index
1512 || (other
[j
].index
== this_index
1513 && other
[j
].unit_offset
< this_offset
)))
1516 if (j
>= other
.length ())
1518 elts
[i
].value
= NULL_TREE
;
1522 if (other
[j
].index
== this_index
1523 && other
[j
].unit_offset
== this_offset
1524 && other
[j
].by_ref
== elts
[i
].by_ref
1526 && values_equal_for_ipcp_p (other
[j
].value
, elts
[i
].value
))
1529 elts
[i
].value
= NULL_TREE
;
1531 return valid_entries
;
1534 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1535 return true is any of them has not been marked as such so far. */
1538 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1541 ret
= plats
->itself
.set_contains_variable ();
1542 ret
|= plats
->ctxlat
.set_contains_variable ();
1543 ret
|= set_agg_lats_contain_variable (plats
);
1544 ret
|= plats
->bits_lattice
.set_to_bottom ();
1545 ret
|= plats
->m_value_range
.set_to_bottom ();
1549 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1550 points to by the number of callers to NODE. */
1553 count_callers (cgraph_node
*node
, void *data
)
1555 int *caller_count
= (int *) data
;
1557 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1558 /* Local thunks can be handled transparently, but if the thunk cannot
1559 be optimized out, count it as a real use. */
1560 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1565 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1566 the one caller of some other node. Set the caller's corresponding flag. */
1569 set_single_call_flag (cgraph_node
*node
, void *)
1571 cgraph_edge
*cs
= node
->callers
;
1572 /* Local thunks can be handled transparently, skip them. */
1573 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1574 cs
= cs
->next_caller
;
1576 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1578 info
->node_calling_single_call
= true;
1584 /* Initialize ipcp_lattices. */
1587 initialize_node_lattices (struct cgraph_node
*node
)
1589 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1590 struct cgraph_edge
*ie
;
1591 bool disable
= false, variable
= false;
1594 gcc_checking_assert (node
->has_gimple_body_p ());
1596 if (!ipa_get_param_count (info
))
1598 else if (node
->local
)
1600 int caller_count
= 0;
1601 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1603 gcc_checking_assert (caller_count
> 0);
1604 if (caller_count
== 1)
1605 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1610 /* When cloning is allowed, we can assume that externally visible
1611 functions are not called. We will compensate this by cloning
1613 if (ipcp_versionable_function_p (node
)
1614 && ipcp_cloning_candidate_p (node
))
1620 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1621 && !node
->alias
&& !node
->thunk
)
1623 fprintf (dump_file
, "Initializing lattices of %s\n",
1624 node
->dump_name ());
1625 if (disable
|| variable
)
1626 fprintf (dump_file
, " Marking all lattices as %s\n",
1627 disable
? "BOTTOM" : "VARIABLE");
1630 auto_vec
<bool, 16> surviving_params
;
1631 bool pre_modified
= false;
1633 clone_info
*cinfo
= clone_info::get (node
);
1635 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1637 /* At the moment all IPA optimizations should use the number of
1638 parameters of the prevailing decl as the m_always_copy_start.
1639 Handling any other value would complicate the code below, so for the
1640 time bing let's only assert it is so. */
1641 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1642 == ipa_get_param_count (info
))
1643 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1645 pre_modified
= true;
1646 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1648 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1649 && !node
->alias
&& !node
->thunk
)
1652 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1654 if (j
< (int) surviving_params
.length ()
1655 && surviving_params
[j
])
1660 " The following parameters are dead on arrival:");
1663 fprintf (dump_file
, " %u", j
);
1666 fprintf (dump_file
, "\n");
1670 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1672 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1673 tree type
= ipa_get_type (info
, i
);
1675 || !ipa_get_type (info
, i
)
1676 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1677 || !surviving_params
[i
])))
1679 plats
->itself
.set_to_bottom ();
1680 plats
->ctxlat
.set_to_bottom ();
1681 set_agg_lats_to_bottom (plats
);
1682 plats
->bits_lattice
.set_to_bottom ();
1683 plats
->m_value_range
.init (type
);
1684 plats
->m_value_range
.set_to_bottom ();
1688 plats
->m_value_range
.init (type
);
1690 set_all_contains_variable (plats
);
1694 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1695 if (ie
->indirect_info
->polymorphic
1696 && ie
->indirect_info
->param_index
>= 0)
1698 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1699 ipa_get_parm_lattices (info
,
1700 ie
->indirect_info
->param_index
)->virt_call
= 1;
1704 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1708 ipacp_value_safe_for_type (tree param_type
, tree value
)
1710 tree val_type
= TREE_TYPE (value
);
1711 if (param_type
== val_type
1712 || useless_type_conversion_p (param_type
, val_type
)
1713 || fold_convertible_p (param_type
, value
))
1719 /* Return the result of a (possibly arithmetic) operation on the constant
1720 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1721 the type of the parameter to which the result is passed. Return
1722 NULL_TREE if that cannot be determined or be considered an
1723 interprocedural invariant. */
1726 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1731 if (opcode
== NOP_EXPR
)
1733 if (!is_gimple_ip_invariant (input
))
1736 if (opcode
== ASSERT_EXPR
)
1738 if (values_equal_for_ipcp_p (input
, operand
))
1746 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1747 res_type
= boolean_type_node
;
1748 else if (expr_type_first_operand_type_p (opcode
))
1749 res_type
= TREE_TYPE (input
);
1754 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1755 res
= fold_unary (opcode
, res_type
, input
);
1757 res
= fold_binary (opcode
, res_type
, input
, operand
);
1759 if (res
&& !is_gimple_ip_invariant (res
))
1765 /* Return the result of a (possibly arithmetic) pass through jump function
1766 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1767 to which the result is passed. Return NULL_TREE if that cannot be
1768 determined or be considered an interprocedural invariant. */
1771 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1774 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1776 ipa_get_jf_pass_through_operand (jfunc
),
1780 /* Return the result of an ancestor jump function JFUNC on the constant value
1781 INPUT. Return NULL_TREE if that cannot be determined. */
1784 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1786 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1787 if (TREE_CODE (input
) == ADDR_EXPR
)
1789 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1790 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1791 if (known_eq (off
, 0))
1793 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1794 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1795 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1796 build_int_cst (ptr_type_node
, byte_offset
)));
1798 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1805 /* Determine whether JFUNC evaluates to a single known constant value and if
1806 so, return it. Otherwise return NULL. INFO describes the caller node or
1807 the one it is inlined to, so that pass-through jump functions can be
1808 evaluated. PARM_TYPE is the type of the parameter to which the result is
1812 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1815 if (jfunc
->type
== IPA_JF_CONST
)
1816 return ipa_get_jf_constant (jfunc
);
1817 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1818 || jfunc
->type
== IPA_JF_ANCESTOR
)
1823 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1824 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1826 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1828 if (info
->ipcp_orig_node
)
1829 input
= info
->known_csts
[idx
];
1832 ipcp_lattice
<tree
> *lat
;
1835 || idx
>= ipa_get_param_count (info
))
1837 lat
= ipa_get_scalar_lat (info
, idx
);
1838 if (!lat
->is_single_const ())
1840 input
= lat
->values
->value
;
1846 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1847 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1849 return ipa_get_jf_ancestor_result (jfunc
, input
);
1855 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1856 that INFO describes the caller node or the one it is inlined to, CS is the
1857 call graph edge corresponding to JFUNC and CSIDX index of the described
1860 ipa_polymorphic_call_context
1861 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1862 ipa_jump_func
*jfunc
)
1864 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1865 ipa_polymorphic_call_context ctx
;
1866 ipa_polymorphic_call_context
*edge_ctx
1867 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1869 if (edge_ctx
&& !edge_ctx
->useless_p ())
1872 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1873 || jfunc
->type
== IPA_JF_ANCESTOR
)
1875 ipa_polymorphic_call_context srcctx
;
1877 bool type_preserved
= true;
1878 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1880 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1882 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1883 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1887 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1888 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1890 if (info
->ipcp_orig_node
)
1892 if (info
->known_contexts
.exists ())
1893 srcctx
= info
->known_contexts
[srcidx
];
1898 || srcidx
>= ipa_get_param_count (info
))
1900 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1901 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1902 if (!lat
->is_single_const ())
1904 srcctx
= lat
->values
->value
;
1906 if (srcctx
.useless_p ())
1908 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1909 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1910 if (!type_preserved
)
1911 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1912 srcctx
.combine_with (ctx
);
1919 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1920 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1921 the result is a range that is not VARYING nor UNDEFINED. */
1924 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1925 const vrange
&src_vr
,
1926 enum tree_code operation
,
1927 tree dst_type
, tree src_type
)
1929 if (!irange::supports_p (dst_type
) || !irange::supports_p (src_type
))
1932 range_op_handler
handler (operation
);
1936 Value_Range
varying (dst_type
);
1937 varying
.set_varying (dst_type
);
1939 return (handler
.fold_range (dst_vr
, dst_type
, src_vr
, varying
)
1940 && !dst_vr
.varying_p ()
1941 && !dst_vr
.undefined_p ());
1944 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1945 first be extracted onto a vrange. */
1948 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1949 const ipa_vr
&src_vr
,
1950 enum tree_code operation
,
1951 tree dst_type
, tree src_type
)
1954 src_vr
.get_vrange (tmp
);
1955 return ipa_vr_operation_and_type_effects (dst_vr
, tmp
, operation
,
1956 dst_type
, src_type
);
1959 /* Determine range of JFUNC given that INFO describes the caller node or
1960 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1961 and PARM_TYPE of the parameter. */
1964 ipa_value_range_from_jfunc (vrange
&vr
,
1965 ipa_node_params
*info
, cgraph_edge
*cs
,
1966 ipa_jump_func
*jfunc
, tree parm_type
)
1968 vr
.set_undefined ();
1971 ipa_vr_operation_and_type_effects (vr
,
1973 NOP_EXPR
, parm_type
,
1974 jfunc
->m_vr
->type ());
1975 if (vr
.singleton_p ())
1977 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1980 ipcp_transformation
*sum
1981 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1982 ? cs
->caller
->inlined_to
1984 if (!sum
|| !sum
->m_vr
)
1987 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1989 if (!(*sum
->m_vr
)[idx
].known_p ())
1991 tree vr_type
= ipa_get_type (info
, idx
);
1993 (*sum
->m_vr
)[idx
].get_vrange (srcvr
);
1995 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1997 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1999 Value_Range
res (vr_type
);
2001 if (ipa_vr_operation_and_type_effects (res
,
2003 operation
, parm_type
,
2009 Value_Range
op_res (vr_type
);
2010 Value_Range
res (vr_type
);
2011 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2012 Value_Range
op_vr (vr_type
);
2013 range_op_handler
handler (operation
);
2015 ipa_range_set_and_normalize (op_vr
, op
);
2018 || !op_res
.supports_type_p (vr_type
)
2019 || !handler
.fold_range (op_res
, vr_type
, srcvr
, op_vr
))
2020 op_res
.set_varying (vr_type
);
2022 if (ipa_vr_operation_and_type_effects (res
,
2024 NOP_EXPR
, parm_type
,
2031 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2032 single known constant value and if so, return it. Otherwise return NULL.
2033 NODE and INFO describes the caller node or the one it is inlined to, and
2034 its related info. */
2037 ipa_agg_value_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
2038 const ipa_agg_jf_item
*item
)
2040 tree value
= NULL_TREE
;
2043 if (item
->offset
< 0
2044 || item
->jftype
== IPA_JF_UNKNOWN
2045 || item
->offset
>= (HOST_WIDE_INT
) UINT_MAX
* BITS_PER_UNIT
)
2048 if (item
->jftype
== IPA_JF_CONST
)
2049 return item
->value
.constant
;
2051 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2052 || item
->jftype
== IPA_JF_LOAD_AGG
);
2054 src_idx
= item
->value
.pass_through
.formal_id
;
2056 if (info
->ipcp_orig_node
)
2058 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2059 value
= info
->known_csts
[src_idx
];
2060 else if (ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
))
2062 ipa_argagg_value_list
avl (ts
);
2063 value
= avl
.get_value (src_idx
,
2064 item
->value
.load_agg
.offset
/ BITS_PER_UNIT
,
2065 item
->value
.load_agg
.by_ref
);
2068 else if (info
->lattices
)
2070 class ipcp_param_lattices
*src_plats
2071 = ipa_get_parm_lattices (info
, src_idx
);
2073 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2075 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
2077 if (!lat
->is_single_const ())
2080 value
= lat
->values
->value
;
2082 else if (src_plats
->aggs
2083 && !src_plats
->aggs_bottom
2084 && !src_plats
->aggs_contain_variable
2085 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
2087 struct ipcp_agg_lattice
*aglat
;
2089 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
2091 if (aglat
->offset
> item
->value
.load_agg
.offset
)
2094 if (aglat
->offset
== item
->value
.load_agg
.offset
)
2096 if (aglat
->is_single_const ())
2097 value
= aglat
->values
->value
;
2107 if (item
->jftype
== IPA_JF_LOAD_AGG
)
2109 tree load_type
= item
->value
.load_agg
.type
;
2110 tree value_type
= TREE_TYPE (value
);
2112 /* Ensure value type is compatible with load type. */
2113 if (!useless_type_conversion_p (load_type
, value_type
))
2117 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
2119 item
->value
.pass_through
.operand
,
2123 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2124 caller is inlined to) NODE which described by INFO and push the results to
2125 RES as describing values passed in parameter DST_INDEX. */
2128 ipa_push_agg_values_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
2129 ipa_agg_jump_function
*agg_jfunc
,
2131 vec
<ipa_argagg_value
> *res
)
2133 unsigned prev_unit_offset
= 0;
2136 for (const ipa_agg_jf_item
&item
: agg_jfunc
->items
)
2138 tree value
= ipa_agg_value_from_jfunc (info
, node
, &item
);
2142 ipa_argagg_value iav
;
2144 iav
.unit_offset
= item
.offset
/ BITS_PER_UNIT
;
2145 iav
.index
= dst_index
;
2146 iav
.by_ref
= agg_jfunc
->by_ref
;
2150 || iav
.unit_offset
> prev_unit_offset
);
2151 prev_unit_offset
= iav
.unit_offset
;
2154 res
->safe_push (iav
);
2158 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2159 bottom, not containing a variable component and without any known value at
2163 ipcp_verify_propagated_values (void)
2165 struct cgraph_node
*node
;
2167 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2169 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
2170 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
2171 || !opt_for_fn (node
->decl
, optimize
))
2173 int i
, count
= ipa_get_param_count (info
);
2175 for (i
= 0; i
< count
; i
++)
2177 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
2180 && !lat
->contains_variable
2181 && lat
->values_count
== 0)
2185 symtab
->dump (dump_file
);
2186 fprintf (dump_file
, "\nIPA lattices after constant "
2187 "propagation, before gcc_unreachable:\n");
2188 print_all_lattices (dump_file
, true, false);
2197 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2200 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
2201 ipa_polymorphic_call_context y
)
2203 return x
.equal_to (y
);
2207 /* Add a new value source to the value represented by THIS, marking that a
2208 value comes from edge CS and (if the underlying jump function is a
2209 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2210 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2211 scalar value of the parameter itself or the offset within an aggregate. */
2213 template <typename valtype
>
2215 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
2216 int src_idx
, HOST_WIDE_INT offset
)
2218 ipcp_value_source
<valtype
> *src
;
2220 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
2221 src
->offset
= offset
;
2224 src
->index
= src_idx
;
2226 src
->next
= sources
;
2230 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2231 SOURCE and clear all other fields. */
2233 static ipcp_value
<tree
> *
2234 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
2236 ipcp_value
<tree
> *val
;
2238 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
2240 val
->self_recursion_generated_level
= same_lat_gen_level
;
2244 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2245 value to SOURCE and clear all other fields. */
2247 static ipcp_value
<ipa_polymorphic_call_context
> *
2248 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
2249 unsigned same_lat_gen_level
)
2251 ipcp_value
<ipa_polymorphic_call_context
> *val
;
2253 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
2254 ipcp_value
<ipa_polymorphic_call_context
>();
2256 val
->self_recursion_generated_level
= same_lat_gen_level
;
2260 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2261 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2262 meaning. OFFSET -1 means the source is scalar and not a part of an
2263 aggregate. If non-NULL, VAL_P records address of existing or newly added
2266 If the value is generated for a self-recursive call as a result of an
2267 arithmetic pass-through jump-function acting on a value in the same lattice,
2268 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2269 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2271 template <typename valtype
>
2273 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
2274 ipcp_value
<valtype
> *src_val
,
2275 int src_idx
, HOST_WIDE_INT offset
,
2276 ipcp_value
<valtype
> **val_p
,
2277 unsigned same_lat_gen_level
)
2279 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
2287 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
2288 if (values_equal_for_ipcp_p (val
->value
, newval
))
2293 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
2294 val
->self_recursion_generated_level
= same_lat_gen_level
;
2296 if (ipa_edge_within_scc (cs
))
2298 ipcp_value_source
<valtype
> *s
;
2299 for (s
= val
->sources
; s
; s
= s
->next
)
2300 if (s
->cs
== cs
&& s
->val
== src_val
)
2306 val
->add_source (cs
, src_val
, src_idx
, offset
);
2310 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
2311 param_ipa_cp_value_list_size
))
2313 /* We can only free sources, not the values themselves, because sources
2314 of other values in this SCC might point to them. */
2315 for (val
= values
; val
; val
= val
->next
)
2317 while (val
->sources
)
2319 ipcp_value_source
<valtype
> *src
= val
->sources
;
2320 val
->sources
= src
->next
;
2321 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2325 return set_to_bottom ();
2329 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2330 val
->add_source (cs
, src_val
, src_idx
, offset
);
2333 /* Add the new value to end of value list, which can reduce iterations
2334 of propagation stage for recursive function. */
2336 last_val
->next
= val
;
2346 /* A helper function that returns result of operation specified by OPCODE on
2347 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2348 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2349 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2353 get_val_across_arith_op (enum tree_code opcode
,
2356 ipcp_value
<tree
> *src_val
,
2359 tree opnd1
= src_val
->value
;
2361 /* Skip source values that is incompatible with specified type. */
2363 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2366 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2369 /* Propagate values through an arithmetic transformation described by a jump
2370 function associated with edge CS, taking values from SRC_LAT and putting
2371 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2372 OPND2 is a constant value if transformation is a binary operation.
2373 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2374 a part of the aggregate. SRC_IDX is the index of the source parameter.
2375 RES_TYPE is the value type of result being propagated into. Return true if
2376 DEST_LAT changed. */
2379 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2380 enum tree_code opcode
,
2383 ipcp_lattice
<tree
> *src_lat
,
2384 ipcp_lattice
<tree
> *dest_lat
,
2385 HOST_WIDE_INT src_offset
,
2389 ipcp_value
<tree
> *src_val
;
2392 /* Due to circular dependencies, propagating within an SCC through arithmetic
2393 transformation would create infinite number of values. But for
2394 self-feeding recursive function, we could allow propagation in a limited
2395 count, and this can enable a simple kind of recursive function versioning.
2396 For other scenario, we would just make lattices bottom. */
2397 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2401 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2402 param_ipa_cp_max_recursive_depth
);
2403 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2404 return dest_lat
->set_contains_variable ();
2406 /* No benefit if recursive execution is in low probability. */
2407 if (cs
->sreal_frequency () * 100
2408 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2409 param_ipa_cp_min_recursive_probability
))
2410 return dest_lat
->set_contains_variable ();
2412 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2414 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2416 /* Now we do not use self-recursively generated value as propagation
2417 source, this is absolutely conservative, but could avoid explosion
2418 of lattice's value space, especially when one recursive function
2419 calls another recursive. */
2420 if (src_val
->self_recursion_generated_p ())
2422 ipcp_value_source
<tree
> *s
;
2424 /* If the lattice has already been propagated for the call site,
2425 no need to do that again. */
2426 for (s
= src_val
->sources
; s
; s
= s
->next
)
2428 return dest_lat
->set_contains_variable ();
2431 val_seeds
.safe_push (src_val
);
2434 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2436 /* Recursively generate lattice values with a limited count. */
2437 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2439 for (int j
= 1; j
< max_recursive_depth
; j
++)
2441 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2444 || !ipacp_value_safe_for_type (res_type
, cstval
))
2447 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2448 src_offset
, &src_val
, j
);
2449 gcc_checking_assert (src_val
);
2452 ret
|= dest_lat
->set_contains_variable ();
2455 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2457 /* Now we do not use self-recursively generated value as propagation
2458 source, otherwise it is easy to make value space of normal lattice
2460 if (src_val
->self_recursion_generated_p ())
2462 ret
|= dest_lat
->set_contains_variable ();
2466 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2469 && ipacp_value_safe_for_type (res_type
, cstval
))
2470 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2473 ret
|= dest_lat
->set_contains_variable ();
2479 /* Propagate values through a pass-through jump function JFUNC associated with
2480 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2481 is the index of the source parameter. PARM_TYPE is the type of the
2482 parameter to which the result is passed. */
2485 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2486 ipcp_lattice
<tree
> *src_lat
,
2487 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2490 return propagate_vals_across_arith_jfunc (cs
,
2491 ipa_get_jf_pass_through_operation (jfunc
),
2493 ipa_get_jf_pass_through_operand (jfunc
),
2494 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2497 /* Propagate values through an ancestor jump function JFUNC associated with
2498 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2499 is the index of the source parameter. */
2502 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2503 struct ipa_jump_func
*jfunc
,
2504 ipcp_lattice
<tree
> *src_lat
,
2505 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2508 ipcp_value
<tree
> *src_val
;
2511 if (ipa_edge_within_scc (cs
))
2512 return dest_lat
->set_contains_variable ();
2514 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2516 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2518 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2519 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2521 ret
|= dest_lat
->set_contains_variable ();
2527 /* Propagate scalar values across jump function JFUNC that is associated with
2528 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2529 parameter to which the result is passed. */
2532 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2533 struct ipa_jump_func
*jfunc
,
2534 ipcp_lattice
<tree
> *dest_lat
,
2537 if (dest_lat
->bottom
)
2540 if (jfunc
->type
== IPA_JF_CONST
)
2542 tree val
= ipa_get_jf_constant (jfunc
);
2543 if (ipacp_value_safe_for_type (param_type
, val
))
2544 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2546 return dest_lat
->set_contains_variable ();
2548 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2549 || jfunc
->type
== IPA_JF_ANCESTOR
)
2551 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2552 ipcp_lattice
<tree
> *src_lat
;
2556 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2557 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2559 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2561 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2562 if (src_lat
->bottom
)
2563 return dest_lat
->set_contains_variable ();
2565 /* If we would need to clone the caller and cannot, do not propagate. */
2566 if (!ipcp_versionable_function_p (cs
->caller
)
2567 && (src_lat
->contains_variable
2568 || (src_lat
->values_count
> 1)))
2569 return dest_lat
->set_contains_variable ();
2571 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2572 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2576 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2577 src_idx
, param_type
);
2579 if (src_lat
->contains_variable
)
2580 ret
|= dest_lat
->set_contains_variable ();
2585 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2586 use it for indirect inlining), we should propagate them too. */
2587 return dest_lat
->set_contains_variable ();
2590 /* Propagate scalar values across jump function JFUNC that is associated with
2591 edge CS and describes argument IDX and put the values into DEST_LAT. */
2594 propagate_context_across_jump_function (cgraph_edge
*cs
,
2595 ipa_jump_func
*jfunc
, int idx
,
2596 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2598 if (dest_lat
->bottom
)
2600 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2602 bool added_sth
= false;
2603 bool type_preserved
= true;
2605 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2606 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2609 edge_ctx
= *edge_ctx_ptr
;
2611 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2612 || jfunc
->type
== IPA_JF_ANCESTOR
)
2614 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2616 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2618 /* TODO: Once we figure out how to propagate speculations, it will
2619 probably be a good idea to switch to speculation if type_preserved is
2620 not set instead of punting. */
2621 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2623 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2625 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2626 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2630 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2631 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2634 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2635 /* If we would need to clone the caller and cannot, do not propagate. */
2636 if (!ipcp_versionable_function_p (cs
->caller
)
2637 && (src_lat
->contains_variable
2638 || (src_lat
->values_count
> 1)))
2641 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2642 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2644 ipa_polymorphic_call_context cur
= src_val
->value
;
2646 if (!type_preserved
)
2647 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2648 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2649 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2650 /* TODO: In cases we know how the context is going to be used,
2651 we can improve the result by passing proper OTR_TYPE. */
2652 cur
.combine_with (edge_ctx
);
2653 if (!cur
.useless_p ())
2655 if (src_lat
->contains_variable
2656 && !edge_ctx
.equal_to (cur
))
2657 ret
|= dest_lat
->set_contains_variable ();
2658 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2667 if (!edge_ctx
.useless_p ())
2668 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2670 ret
|= dest_lat
->set_contains_variable ();
2676 /* Propagate bits across jfunc that is associated with
2677 edge cs and update dest_lattice accordingly. */
2680 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2681 ipa_jump_func
*jfunc
,
2682 ipcp_bits_lattice
*dest_lattice
)
2684 if (dest_lattice
->bottom_p ())
2687 enum availability availability
;
2688 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2689 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2690 tree parm_type
= ipa_get_type (callee_info
, idx
);
2692 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2693 transform for these cases. Similarly, we can have bad type mismatches
2694 with LTO, avoid doing anything with those too. */
2696 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2699 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2700 "param %i of %s is NULL or unsuitable for bits propagation\n",
2701 idx
, cs
->callee
->dump_name ());
2703 return dest_lattice
->set_to_bottom ();
2706 unsigned precision
= TYPE_PRECISION (parm_type
);
2707 signop sgn
= TYPE_SIGN (parm_type
);
2709 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2710 || jfunc
->type
== IPA_JF_ANCESTOR
)
2712 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2713 tree operand
= NULL_TREE
;
2714 enum tree_code code
;
2716 bool keep_null
= false;
2718 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2720 code
= ipa_get_jf_pass_through_operation (jfunc
);
2721 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2722 if (code
!= NOP_EXPR
)
2723 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2727 code
= POINTER_PLUS_EXPR
;
2728 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2729 unsigned HOST_WIDE_INT offset
2730 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2731 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2732 operand
= build_int_cstu (size_type_node
, offset
);
2735 class ipcp_param_lattices
*src_lats
2736 = ipa_get_parm_lattices (caller_info
, src_idx
);
2738 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2744 Assume lattice for x is bottom, however we can still propagate
2745 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2746 and we store it in jump function during analysis stage. */
2748 if (!src_lats
->bits_lattice
.bottom_p ())
2751 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2753 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2754 sgn
, code
, operand
, drop_all_ones
);
2758 Value_Range
vr (parm_type
);
2761 jfunc
->m_vr
->get_vrange (vr
);
2762 if (!vr
.undefined_p () && !vr
.varying_p ())
2764 irange
&r
= as_a
<irange
> (vr
);
2765 irange_bitmask bm
= r
.get_bitmask ();
2767 = widest_int::from (bm
.mask (), TYPE_SIGN (parm_type
));
2769 = widest_int::from (bm
.value (), TYPE_SIGN (parm_type
));
2770 return dest_lattice
->meet_with (value
, mask
, precision
);
2773 return dest_lattice
->set_to_bottom ();
2776 /* Propagate value range across jump function JFUNC that is associated with
2777 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2781 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2782 class ipcp_param_lattices
*dest_plats
,
2785 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2787 if (dest_lat
->bottom_p ())
2791 || (!INTEGRAL_TYPE_P (param_type
)
2792 && !POINTER_TYPE_P (param_type
)))
2793 return dest_lat
->set_to_bottom ();
2795 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2797 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2798 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2799 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2800 class ipcp_param_lattices
*src_lats
2801 = ipa_get_parm_lattices (caller_info
, src_idx
);
2802 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2804 if (src_lats
->m_value_range
.bottom_p ())
2805 return dest_lat
->set_to_bottom ();
2807 Value_Range
vr (operand_type
);
2808 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2809 ipa_vr_operation_and_type_effects (vr
,
2810 src_lats
->m_value_range
.m_vr
,
2811 operation
, param_type
,
2813 /* A crude way to prevent unbounded number of value range updates
2814 in SCC components. We should allow limited number of updates within
2816 else if (!ipa_edge_within_scc (cs
))
2818 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2819 Value_Range
op_vr (TREE_TYPE (op
));
2820 Value_Range
op_res (operand_type
);
2821 range_op_handler
handler (operation
);
2823 ipa_range_set_and_normalize (op_vr
, op
);
2826 || !op_res
.supports_type_p (operand_type
)
2827 || !handler
.fold_range (op_res
, operand_type
,
2828 src_lats
->m_value_range
.m_vr
, op_vr
))
2829 op_res
.set_varying (operand_type
);
2831 ipa_vr_operation_and_type_effects (vr
,
2833 NOP_EXPR
, param_type
,
2836 if (!vr
.undefined_p () && !vr
.varying_p ())
2840 Value_Range
jvr (param_type
);
2841 if (ipa_vr_operation_and_type_effects (jvr
, *jfunc
->m_vr
,
2844 jfunc
->m_vr
->type ()))
2847 return dest_lat
->meet_with (vr
);
2850 else if (jfunc
->type
== IPA_JF_CONST
)
2852 tree val
= ipa_get_jf_constant (jfunc
);
2853 if (TREE_CODE (val
) == INTEGER_CST
)
2855 val
= fold_convert (param_type
, val
);
2856 if (TREE_OVERFLOW_P (val
))
2857 val
= drop_tree_overflow (val
);
2859 Value_Range
tmpvr (val
, val
);
2860 return dest_lat
->meet_with (tmpvr
);
2864 Value_Range
vr (param_type
);
2866 && ipa_vr_operation_and_type_effects (vr
, *jfunc
->m_vr
, NOP_EXPR
,
2868 jfunc
->m_vr
->type ()))
2869 return dest_lat
->meet_with (vr
);
2871 return dest_lat
->set_to_bottom ();
2874 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2875 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2876 other cases, return false). If there are no aggregate items, set
2877 aggs_by_ref to NEW_AGGS_BY_REF. */
2880 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2881 bool new_aggs_by_ref
)
2883 if (dest_plats
->aggs
)
2885 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2887 set_agg_lats_to_bottom (dest_plats
);
2892 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2896 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2897 already existing lattice for the given OFFSET and SIZE, marking all skipped
2898 lattices as containing variable and checking for overlaps. If there is no
2899 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2900 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2901 unless there are too many already. If there are two many, return false. If
2902 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2903 skipped lattices were newly marked as containing variable, set *CHANGE to
2904 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2907 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2908 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2909 struct ipcp_agg_lattice
***aglat
,
2910 bool pre_existing
, bool *change
, int max_agg_items
)
2912 gcc_checking_assert (offset
>= 0);
2914 while (**aglat
&& (**aglat
)->offset
< offset
)
2916 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2918 set_agg_lats_to_bottom (dest_plats
);
2921 *change
|= (**aglat
)->set_contains_variable ();
2922 *aglat
= &(**aglat
)->next
;
2925 if (**aglat
&& (**aglat
)->offset
== offset
)
2927 if ((**aglat
)->size
!= val_size
)
2929 set_agg_lats_to_bottom (dest_plats
);
2932 gcc_assert (!(**aglat
)->next
2933 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2938 struct ipcp_agg_lattice
*new_al
;
2940 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2942 set_agg_lats_to_bottom (dest_plats
);
2945 if (dest_plats
->aggs_count
== max_agg_items
)
2947 dest_plats
->aggs_count
++;
2948 new_al
= ipcp_agg_lattice_pool
.allocate ();
2949 memset (new_al
, 0, sizeof (*new_al
));
2951 new_al
->offset
= offset
;
2952 new_al
->size
= val_size
;
2953 new_al
->contains_variable
= pre_existing
;
2955 new_al
->next
= **aglat
;
2961 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2962 containing an unknown value. */
2965 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2970 ret
|= aglat
->set_contains_variable ();
2971 aglat
= aglat
->next
;
2976 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2977 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2978 parameter used for lattice value sources. Return true if DEST_PLATS changed
2982 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2983 class ipcp_param_lattices
*dest_plats
,
2984 class ipcp_param_lattices
*src_plats
,
2985 int src_idx
, HOST_WIDE_INT offset_delta
)
2987 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2988 struct ipcp_agg_lattice
**dst_aglat
;
2991 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2993 if (src_plats
->aggs_bottom
)
2994 return set_agg_lats_contain_variable (dest_plats
);
2995 if (src_plats
->aggs_contain_variable
)
2996 ret
|= set_agg_lats_contain_variable (dest_plats
);
2997 dst_aglat
= &dest_plats
->aggs
;
2999 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
3000 param_ipa_max_agg_items
);
3001 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
3003 src_aglat
= src_aglat
->next
)
3005 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
3009 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
3010 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
3012 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
3014 dst_aglat
= &(*dst_aglat
)->next
;
3015 if (src_aglat
->bottom
)
3017 ret
|= new_al
->set_contains_variable ();
3020 if (src_aglat
->contains_variable
)
3021 ret
|= new_al
->set_contains_variable ();
3022 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
3025 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
3028 else if (dest_plats
->aggs_bottom
)
3031 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
3035 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3036 pass-through JFUNC and if so, whether it has conform and conforms to the
3037 rules about propagating values passed by reference. */
3040 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
3041 struct ipa_jump_func
*jfunc
)
3043 return src_plats
->aggs
3044 && (!src_plats
->aggs_by_ref
3045 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
3048 /* Propagate values through ITEM, jump function for a part of an aggregate,
3049 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3050 associated with the jump function. Return true if AGLAT changed in any
3054 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
3055 struct ipa_agg_jf_item
*item
,
3056 struct ipcp_agg_lattice
*aglat
)
3058 class ipa_node_params
*caller_info
;
3059 class ipcp_param_lattices
*src_plats
;
3060 struct ipcp_lattice
<tree
> *src_lat
;
3061 HOST_WIDE_INT src_offset
;
3066 if (item
->jftype
== IPA_JF_CONST
)
3068 tree value
= item
->value
.constant
;
3070 gcc_checking_assert (is_gimple_ip_invariant (value
));
3071 return aglat
->add_value (value
, cs
, NULL
, 0);
3074 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
3075 || item
->jftype
== IPA_JF_LOAD_AGG
);
3077 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3078 src_idx
= item
->value
.pass_through
.formal_id
;
3079 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3081 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
3083 load_type
= NULL_TREE
;
3084 src_lat
= &src_plats
->itself
;
3089 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
3090 struct ipcp_agg_lattice
*src_aglat
;
3092 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
3093 if (src_aglat
->offset
>= load_offset
)
3096 load_type
= item
->value
.load_agg
.type
;
3098 || src_aglat
->offset
> load_offset
3099 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
3100 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
3101 return aglat
->set_contains_variable ();
3103 src_lat
= src_aglat
;
3104 src_offset
= load_offset
;
3108 || (!ipcp_versionable_function_p (cs
->caller
)
3109 && !src_lat
->is_single_const ()))
3110 return aglat
->set_contains_variable ();
3112 ret
= propagate_vals_across_arith_jfunc (cs
,
3113 item
->value
.pass_through
.operation
,
3115 item
->value
.pass_through
.operand
,
3121 if (src_lat
->contains_variable
)
3122 ret
|= aglat
->set_contains_variable ();
3127 /* Propagate scalar values across jump function JFUNC that is associated with
3128 edge CS and put the values into DEST_LAT. */
3131 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
3132 struct ipa_jump_func
*jfunc
,
3133 class ipcp_param_lattices
*dest_plats
)
3137 if (dest_plats
->aggs_bottom
)
3140 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3141 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3143 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3144 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
3145 class ipcp_param_lattices
*src_plats
;
3147 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3148 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
3150 /* Currently we do not produce clobber aggregate jump
3151 functions, replace with merging when we do. */
3152 gcc_assert (!jfunc
->agg
.items
);
3153 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
3158 else if (jfunc
->type
== IPA_JF_ANCESTOR
3159 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
3161 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3162 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
3163 class ipcp_param_lattices
*src_plats
;
3165 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3166 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
3168 /* Currently we do not produce clobber aggregate jump
3169 functions, replace with merging when we do. */
3170 gcc_assert (!jfunc
->agg
.items
);
3171 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
3172 ipa_get_jf_ancestor_offset (jfunc
));
3174 else if (!src_plats
->aggs_by_ref
)
3175 ret
|= set_agg_lats_to_bottom (dest_plats
);
3177 ret
|= set_agg_lats_contain_variable (dest_plats
);
3181 if (jfunc
->agg
.items
)
3183 bool pre_existing
= dest_plats
->aggs
!= NULL
;
3184 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
3185 struct ipa_agg_jf_item
*item
;
3188 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
3191 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
3192 param_ipa_max_agg_items
);
3193 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
3195 HOST_WIDE_INT val_size
;
3197 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
3199 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
3201 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
3202 &aglat
, pre_existing
, &ret
, max_agg_items
))
3204 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
3205 aglat
= &(*aglat
)->next
;
3207 else if (dest_plats
->aggs_bottom
)
3211 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
3214 ret
|= set_agg_lats_contain_variable (dest_plats
);
3219 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3220 non-thunk) destination, the call passes through a thunk. */
3223 call_passes_through_thunk (cgraph_edge
*cs
)
3225 cgraph_node
*alias_or_thunk
= cs
->callee
;
3226 while (alias_or_thunk
->alias
)
3227 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
3228 return alias_or_thunk
->thunk
;
3231 /* Propagate constants from the caller to the callee of CS. INFO describes the
3235 propagate_constants_across_call (struct cgraph_edge
*cs
)
3237 class ipa_node_params
*callee_info
;
3238 enum availability availability
;
3239 cgraph_node
*callee
;
3240 class ipa_edge_args
*args
;
3242 int i
, args_count
, parms_count
;
3244 callee
= cs
->callee
->function_symbol (&availability
);
3245 if (!callee
->definition
)
3247 gcc_checking_assert (callee
->has_gimple_body_p ());
3248 callee_info
= ipa_node_params_sum
->get (callee
);
3252 args
= ipa_edge_args_sum
->get (cs
);
3253 parms_count
= ipa_get_param_count (callee_info
);
3254 if (parms_count
== 0)
3257 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
3258 || !opt_for_fn (cs
->caller
->decl
, optimize
))
3260 for (i
= 0; i
< parms_count
; i
++)
3261 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3265 args_count
= ipa_get_cs_argument_count (args
);
3267 /* If this call goes through a thunk we must not propagate to the first (0th)
3268 parameter. However, we might need to uncover a thunk from below a series
3269 of aliases first. */
3270 if (call_passes_through_thunk (cs
))
3272 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3279 for (; (i
< args_count
) && (i
< parms_count
); i
++)
3281 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
3282 class ipcp_param_lattices
*dest_plats
;
3283 tree param_type
= ipa_get_type (callee_info
, i
);
3285 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
3286 if (availability
== AVAIL_INTERPOSABLE
)
3287 ret
|= set_all_contains_variable (dest_plats
);
3290 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
3291 &dest_plats
->itself
,
3293 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
3294 &dest_plats
->ctxlat
);
3296 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
3297 &dest_plats
->bits_lattice
);
3298 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
3300 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
3301 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
3302 dest_plats
, param_type
);
3304 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
3307 for (; i
< parms_count
; i
++)
3308 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
3313 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3314 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3315 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3316 KNOWN_AGGS is ignored. */
3319 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
3320 const vec
<tree
> &known_csts
,
3321 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
3322 const ipa_argagg_value_list
&avs
,
3325 int param_index
= ie
->indirect_info
->param_index
;
3326 HOST_WIDE_INT anc_offset
;
3330 *speculative
= false;
3332 if (param_index
== -1)
3335 if (!ie
->indirect_info
->polymorphic
)
3339 if (ie
->indirect_info
->agg_contents
)
3342 if ((unsigned) param_index
< known_csts
.length ()
3343 && known_csts
[param_index
])
3344 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3345 ie
->indirect_info
->offset
,
3346 ie
->indirect_info
->by_ref
);
3348 if (!t
&& ie
->indirect_info
->guaranteed_unmodified
)
3349 t
= avs
.get_value (param_index
,
3350 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3351 ie
->indirect_info
->by_ref
);
3353 else if ((unsigned) param_index
< known_csts
.length ())
3354 t
= known_csts
[param_index
];
3357 && TREE_CODE (t
) == ADDR_EXPR
3358 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3359 return TREE_OPERAND (t
, 0);
3364 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3367 gcc_assert (!ie
->indirect_info
->agg_contents
);
3368 gcc_assert (!ie
->indirect_info
->by_ref
);
3369 anc_offset
= ie
->indirect_info
->offset
;
3373 if ((unsigned) param_index
< known_csts
.length ()
3374 && known_csts
[param_index
])
3375 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3376 ie
->indirect_info
->offset
, true);
3378 /* Try to work out value of virtual table pointer value in replacements. */
3379 /* or known aggregate values. */
3381 t
= avs
.get_value (param_index
,
3382 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3385 /* If we found the virtual table pointer, lookup the target. */
3389 unsigned HOST_WIDE_INT offset
;
3390 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3393 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3394 vtable
, offset
, &can_refer
);
3398 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3399 || !possible_polymorphic_call_target_p
3400 (ie
, cgraph_node::get (target
)))
3402 /* Do not speculate builtin_unreachable, it is stupid! */
3403 if (ie
->indirect_info
->vptr_changed
)
3405 target
= ipa_impossible_devirt_target (ie
, target
);
3407 *speculative
= ie
->indirect_info
->vptr_changed
;
3414 /* Do we know the constant value of pointer? */
3415 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3416 t
= known_csts
[param_index
];
3418 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3420 ipa_polymorphic_call_context context
;
3421 if (known_contexts
.length () > (unsigned int) param_index
)
3423 context
= known_contexts
[param_index
];
3424 context
.offset_by (anc_offset
);
3425 if (ie
->indirect_info
->vptr_changed
)
3426 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3427 ie
->indirect_info
->otr_type
);
3430 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3431 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3432 if (!ctx2
.useless_p ())
3433 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3438 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3440 if (ie
->indirect_info
->vptr_changed
)
3441 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3442 ie
->indirect_info
->otr_type
);
3447 vec
<cgraph_node
*>targets
;
3450 targets
= possible_polymorphic_call_targets
3451 (ie
->indirect_info
->otr_type
,
3452 ie
->indirect_info
->otr_token
,
3454 if (!final
|| targets
.length () > 1)
3456 struct cgraph_node
*node
;
3459 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3460 || ie
->speculative
|| !ie
->maybe_hot_p ())
3462 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3463 ie
->indirect_info
->otr_token
,
3467 *speculative
= true;
3468 target
= node
->decl
;
3475 *speculative
= false;
3476 if (targets
.length () == 1)
3477 target
= targets
[0]->decl
;
3479 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3482 if (target
&& !possible_polymorphic_call_target_p (ie
,
3483 cgraph_node::get (target
)))
3487 target
= ipa_impossible_devirt_target (ie
, target
);
3493 /* If an indirect edge IE can be turned into a direct one based on data in
3494 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3495 whether the discovered target is only speculative guess. */
3498 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3499 ipa_call_arg_values
*avals
,
3502 ipa_argagg_value_list
avl (avals
);
3503 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3504 avals
->m_known_contexts
,
3508 /* Calculate devirtualization time bonus for NODE, assuming we know information
3509 about arguments stored in AVALS. */
3512 devirtualization_time_bonus (struct cgraph_node
*node
,
3513 ipa_auto_call_arg_values
*avals
)
3515 struct cgraph_edge
*ie
;
3518 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3520 struct cgraph_node
*callee
;
3521 class ipa_fn_summary
*isummary
;
3522 enum availability avail
;
3526 ipa_argagg_value_list
avl (avals
);
3527 target
= ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3528 avals
->m_known_contexts
,
3533 /* Only bare minimum benefit for clearly un-inlineable targets. */
3535 callee
= cgraph_node::get (target
);
3536 if (!callee
|| !callee
->definition
)
3538 callee
= callee
->function_symbol (&avail
);
3539 if (avail
< AVAIL_AVAILABLE
)
3541 isummary
= ipa_fn_summaries
->get (callee
);
3542 if (!isummary
|| !isummary
->inlinable
)
3545 int size
= ipa_size_summaries
->get (callee
)->size
;
3546 /* FIXME: The values below need re-considering and perhaps also
3547 integrating into the cost metrics, at lest in some very basic way. */
3548 int max_inline_insns_auto
3549 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3550 if (size
<= max_inline_insns_auto
/ 4)
3551 res
+= 31 / ((int)speculative
+ 1);
3552 else if (size
<= max_inline_insns_auto
/ 2)
3553 res
+= 15 / ((int)speculative
+ 1);
3554 else if (size
<= max_inline_insns_auto
3555 || DECL_DECLARED_INLINE_P (callee
->decl
))
3556 res
+= 7 / ((int)speculative
+ 1);
3562 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3565 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3568 ipa_hints hints
= estimates
.hints
;
3569 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3570 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3572 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3574 if (hints
& INLINE_HINT_loop_iterations
)
3575 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3577 if (hints
& INLINE_HINT_loop_stride
)
3578 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3583 /* If there is a reason to penalize the function described by INFO in the
3584 cloning goodness evaluation, do so. */
3587 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3590 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3591 evaluation
= (evaluation
3592 * (100 - opt_for_fn (node
->decl
,
3593 param_ipa_cp_recursion_penalty
))) / 100;
3595 if (info
->node_calling_single_call
)
3596 evaluation
= (evaluation
3597 * (100 - opt_for_fn (node
->decl
,
3598 param_ipa_cp_single_call_penalty
)))
3604 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3605 and SIZE_COST and with the sum of frequencies of incoming edges to the
3606 potential new clone in FREQUENCIES. */
3609 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3610 sreal freq_sum
, profile_count count_sum
,
3613 if (time_benefit
== 0
3614 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3615 || node
->optimize_for_size_p ())
3618 gcc_assert (size_cost
> 0);
3620 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3621 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3622 if (count_sum
.nonzero_p ())
3624 gcc_assert (base_count
.nonzero_p ());
3625 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3626 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3627 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3632 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3633 "size: %i, count_sum: ", time_benefit
.to_double (),
3635 count_sum
.dump (dump_file
);
3636 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3637 info
->node_within_scc
3638 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3639 info
->node_calling_single_call
? ", single_call" : "",
3640 evaluation
.to_double (), eval_threshold
);
3643 return evaluation
.to_int () >= eval_threshold
;
3647 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3648 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3651 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3652 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3653 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3655 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3656 info
->node_within_scc
3657 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3658 info
->node_calling_single_call
? ", single_call" : "",
3659 evaluation
.to_double (), eval_threshold
);
3661 return evaluation
.to_int () >= eval_threshold
;
3665 /* Grow vectors in AVALS and fill them with information about values of
3666 parameters that are known to be independent of the context. Only calculate
3667 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3668 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3669 parameters will be stored in it.
3671 TODO: Also grow context independent value range vectors. */
3674 gather_context_independent_values (class ipa_node_params
*info
,
3675 ipa_auto_call_arg_values
*avals
,
3676 bool calculate_aggs
,
3677 int *removable_params_cost
)
3679 int i
, count
= ipa_get_param_count (info
);
3682 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3683 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3685 if (removable_params_cost
)
3686 *removable_params_cost
= 0;
3688 for (i
= 0; i
< count
; i
++)
3690 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3691 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3693 if (lat
->is_single_const ())
3695 ipcp_value
<tree
> *val
= lat
->values
;
3696 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3697 avals
->m_known_vals
[i
] = val
->value
;
3698 if (removable_params_cost
)
3699 *removable_params_cost
3700 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3703 else if (removable_params_cost
3704 && !ipa_is_param_used (info
, i
))
3705 *removable_params_cost
3706 += ipa_get_param_move_cost (info
, i
);
3708 if (!ipa_is_param_used (info
, i
))
3711 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3712 /* Do not account known context as reason for cloning. We can see
3713 if it permits devirtualization. */
3714 if (ctxlat
->is_single_const ())
3715 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3718 ret
|= push_agg_values_from_plats (plats
, i
, 0, &avals
->m_known_aggs
);
3724 /* Perform time and size measurement of NODE with the context given in AVALS,
3725 calculate the benefit compared to the node without specialization and store
3726 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3727 context-independent or unused removable parameters and EST_MOVE_COST, the
3728 estimated movement of the considered parameter. */
3731 perform_estimation_of_a_value (cgraph_node
*node
,
3732 ipa_auto_call_arg_values
*avals
,
3733 int removable_params_cost
, int est_move_cost
,
3734 ipcp_value_base
*val
)
3737 ipa_call_estimates estimates
;
3739 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3741 /* Extern inline functions have no cloning local time benefits because they
3742 will be inlined anyway. The only reason to clone them is if it enables
3743 optimization in any of the functions they call. */
3744 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3747 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3748 + (devirtualization_time_bonus (node
, avals
)
3749 + hint_time_bonus (node
, estimates
)
3750 + removable_params_cost
+ est_move_cost
);
3752 int size
= estimates
.size
;
3753 gcc_checking_assert (size
>=0);
3754 /* The inliner-heuristics based estimates may think that in certain
3755 contexts some functions do not have any size at all but we want
3756 all specializations to have at least a tiny cost, not least not to
3761 val
->local_time_benefit
= time_benefit
;
3762 val
->local_size_cost
= size
;
3765 /* Get the overall limit oof growth based on parameters extracted from growth.
3766 it does not really make sense to mix functions with different overall growth
3767 limits but it is possible and if it happens, we do not want to select one
3771 get_max_overall_size (cgraph_node
*node
)
3773 long max_new_size
= orig_overall_size
;
3774 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3775 if (max_new_size
< large_unit
)
3776 max_new_size
= large_unit
;
3777 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3778 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3779 return max_new_size
;
3782 /* Return true if NODE should be cloned just for a parameter removal, possibly
3783 dumping a reason if not. */
3786 clone_for_param_removal_p (cgraph_node
*node
)
3788 if (!node
->can_change_signature
)
3790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3791 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3792 "function cannot change signature.\n");
3795 if (node
->can_be_local_p ())
3797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3798 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3799 "IPA-SRA can do it potentially better.\n");
3805 /* Iterate over known values of parameters of NODE and estimate the local
3806 effects in terms of time and size they have. */
3809 estimate_local_effects (struct cgraph_node
*node
)
3811 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3812 int count
= ipa_get_param_count (info
);
3814 int removable_params_cost
;
3816 if (!count
|| !ipcp_versionable_function_p (node
))
3819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3820 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3822 ipa_auto_call_arg_values avals
;
3823 always_const
= gather_context_independent_values (info
, &avals
, true,
3824 &removable_params_cost
);
3825 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3826 if (always_const
|| devirt_bonus
3827 || (removable_params_cost
&& clone_for_param_removal_p (node
)))
3829 struct caller_statistics stats
;
3830 ipa_call_estimates estimates
;
3832 init_caller_stats (&stats
);
3833 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3835 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3836 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3837 time
+= devirt_bonus
;
3838 time
+= hint_time_bonus (node
, estimates
);
3839 time
+= removable_params_cost
;
3840 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3843 fprintf (dump_file
, " - context independent values, size: %i, "
3844 "time_benefit: %f\n", size
, (time
).to_double ());
3846 if (size
<= 0 || node
->local
)
3848 info
->do_clone_for_all_contexts
= true;
3851 fprintf (dump_file
, " Decided to specialize for all "
3852 "known contexts, code not going to grow.\n");
3854 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3855 stats
.count_sum
, size
))
3857 if (size
+ overall_size
<= get_max_overall_size (node
))
3859 info
->do_clone_for_all_contexts
= true;
3860 overall_size
+= size
;
3863 fprintf (dump_file
, " Decided to specialize for all "
3864 "known contexts, growth (to %li) deemed "
3865 "beneficial.\n", overall_size
);
3867 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3868 fprintf (dump_file
, " Not cloning for all contexts because "
3869 "maximum unit size would be reached with %li.\n",
3870 size
+ overall_size
);
3872 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3873 fprintf (dump_file
, " Not cloning for all contexts because "
3874 "!good_cloning_opportunity_p.\n");
3878 for (int i
= 0; i
< count
; i
++)
3880 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3881 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3882 ipcp_value
<tree
> *val
;
3886 || avals
.m_known_vals
[i
])
3889 for (val
= lat
->values
; val
; val
= val
->next
)
3891 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3892 avals
.m_known_vals
[i
] = val
->value
;
3894 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3895 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3900 fprintf (dump_file
, " - estimates for value ");
3901 print_ipcp_constant_value (dump_file
, val
->value
);
3902 fprintf (dump_file
, " for ");
3903 ipa_dump_param (dump_file
, info
, i
);
3904 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3905 val
->local_time_benefit
.to_double (),
3906 val
->local_size_cost
);
3909 avals
.m_known_vals
[i
] = NULL_TREE
;
3912 for (int i
= 0; i
< count
; i
++)
3914 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3916 if (!plats
->virt_call
)
3919 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3920 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3924 || !avals
.m_known_contexts
[i
].useless_p ())
3927 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3929 avals
.m_known_contexts
[i
] = val
->value
;
3930 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3935 fprintf (dump_file
, " - estimates for polymorphic context ");
3936 print_ipcp_constant_value (dump_file
, val
->value
);
3937 fprintf (dump_file
, " for ");
3938 ipa_dump_param (dump_file
, info
, i
);
3939 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3940 val
->local_time_benefit
.to_double (),
3941 val
->local_size_cost
);
3944 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3947 unsigned all_ctx_len
= avals
.m_known_aggs
.length ();
3948 auto_vec
<ipa_argagg_value
, 32> all_ctx
;
3949 all_ctx
.reserve_exact (all_ctx_len
);
3950 all_ctx
.splice (avals
.m_known_aggs
);
3951 avals
.m_known_aggs
.safe_grow_cleared (all_ctx_len
+ 1);
3954 for (int index
= 0; index
< count
; index
++)
3956 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, index
);
3958 if (plats
->aggs_bottom
|| !plats
->aggs
)
3961 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3963 ipcp_value
<tree
> *val
;
3964 if (aglat
->bottom
|| !aglat
->values
3965 /* If the following is true, the one value is already part of all
3966 context estimations. */
3967 || (!plats
->aggs_contain_variable
3968 && aglat
->is_single_const ()))
3971 unsigned unit_offset
= aglat
->offset
/ BITS_PER_UNIT
;
3972 while (j
< all_ctx_len
3973 && (all_ctx
[j
].index
< index
3974 || (all_ctx
[j
].index
== index
3975 && all_ctx
[j
].unit_offset
< unit_offset
)))
3977 avals
.m_known_aggs
[j
] = all_ctx
[j
];
3981 for (unsigned k
= j
; k
< all_ctx_len
; k
++)
3982 avals
.m_known_aggs
[k
+1] = all_ctx
[k
];
3984 for (val
= aglat
->values
; val
; val
= val
->next
)
3986 avals
.m_known_aggs
[j
].value
= val
->value
;
3987 avals
.m_known_aggs
[j
].unit_offset
= unit_offset
;
3988 avals
.m_known_aggs
[j
].index
= index
;
3989 avals
.m_known_aggs
[j
].by_ref
= plats
->aggs_by_ref
;
3990 avals
.m_known_aggs
[j
].killed
= false;
3992 perform_estimation_of_a_value (node
, &avals
,
3993 removable_params_cost
, 0, val
);
3995 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3997 fprintf (dump_file
, " - estimates for value ");
3998 print_ipcp_constant_value (dump_file
, val
->value
);
3999 fprintf (dump_file
, " for ");
4000 ipa_dump_param (dump_file
, info
, index
);
4001 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
4002 "]: time_benefit: %g, size: %i\n",
4003 plats
->aggs_by_ref
? "ref " : "",
4005 val
->local_time_benefit
.to_double (),
4006 val
->local_size_cost
);
4014 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
4015 topological sort of values. */
4017 template <typename valtype
>
4019 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
4021 ipcp_value_source
<valtype
> *src
;
4027 cur_val
->dfs
= dfs_counter
;
4028 cur_val
->low_link
= dfs_counter
;
4030 cur_val
->topo_next
= stack
;
4032 cur_val
->on_stack
= true;
4034 for (src
= cur_val
->sources
; src
; src
= src
->next
)
4037 if (src
->val
->dfs
== 0)
4040 if (src
->val
->low_link
< cur_val
->low_link
)
4041 cur_val
->low_link
= src
->val
->low_link
;
4043 else if (src
->val
->on_stack
4044 && src
->val
->dfs
< cur_val
->low_link
)
4045 cur_val
->low_link
= src
->val
->dfs
;
4048 if (cur_val
->dfs
== cur_val
->low_link
)
4050 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
4055 stack
= v
->topo_next
;
4056 v
->on_stack
= false;
4057 v
->scc_no
= cur_val
->dfs
;
4059 v
->scc_next
= scc_list
;
4062 while (v
!= cur_val
);
4064 cur_val
->topo_next
= values_topo
;
4065 values_topo
= cur_val
;
4069 /* Add all values in lattices associated with NODE to the topological sort if
4070 they are not there yet. */
4073 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
4075 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4076 int i
, count
= ipa_get_param_count (info
);
4078 for (i
= 0; i
< count
; i
++)
4080 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
4081 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
4082 struct ipcp_agg_lattice
*aglat
;
4086 ipcp_value
<tree
> *val
;
4087 for (val
= lat
->values
; val
; val
= val
->next
)
4088 topo
->constants
.add_val (val
);
4091 if (!plats
->aggs_bottom
)
4092 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4095 ipcp_value
<tree
> *val
;
4096 for (val
= aglat
->values
; val
; val
= val
->next
)
4097 topo
->constants
.add_val (val
);
4100 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
4101 if (!ctxlat
->bottom
)
4103 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
4104 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
4105 topo
->contexts
.add_val (ctxval
);
4110 /* One pass of constants propagation along the call graph edges, from callers
4111 to callees (requires topological ordering in TOPO), iterate over strongly
4112 connected components. */
4115 propagate_constants_topo (class ipa_topo_info
*topo
)
4119 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
4122 struct cgraph_node
*v
, *node
= topo
->order
[i
];
4123 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
4125 /* First, iteratively propagate within the strongly connected component
4126 until all lattices stabilize. */
4127 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4128 if (v
->has_gimple_body_p ())
4130 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
4131 && opt_for_fn (v
->decl
, optimize
))
4132 push_node_to_stack (topo
, v
);
4133 /* When V is not optimized, we can not push it to stack, but
4134 still we need to set all its callees lattices to bottom. */
4137 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4138 propagate_constants_across_call (cs
);
4142 v
= pop_node_from_stack (topo
);
4145 struct cgraph_edge
*cs
;
4146 class ipa_node_params
*info
= NULL
;
4147 bool self_scc
= true;
4149 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4150 if (ipa_edge_within_scc (cs
))
4152 cgraph_node
*callee
= cs
->callee
->function_symbol ();
4159 info
= ipa_node_params_sum
->get (v
);
4160 info
->node_within_scc
= true;
4163 if (propagate_constants_across_call (cs
))
4164 push_node_to_stack (topo
, callee
);
4168 info
->node_is_self_scc
= self_scc
;
4170 v
= pop_node_from_stack (topo
);
4173 /* Afterwards, propagate along edges leading out of the SCC, calculates
4174 the local effects of the discovered constants and all valid values to
4175 their topological sort. */
4176 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4177 if (v
->has_gimple_body_p ()
4178 && opt_for_fn (v
->decl
, flag_ipa_cp
)
4179 && opt_for_fn (v
->decl
, optimize
))
4181 struct cgraph_edge
*cs
;
4183 estimate_local_effects (v
);
4184 add_all_node_vals_to_toposort (v
, topo
);
4185 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4186 if (!ipa_edge_within_scc (cs
))
4187 propagate_constants_across_call (cs
);
4189 cycle_nodes
.release ();
4193 /* Propagate the estimated effects of individual values along the topological
4194 from the dependent values to those they depend on. */
4196 template <typename valtype
>
4198 value_topo_info
<valtype
>::propagate_effects ()
4200 ipcp_value
<valtype
> *base
;
4201 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
4203 for (base
= values_topo
; base
; base
= base
->topo_next
)
4205 ipcp_value_source
<valtype
> *src
;
4206 ipcp_value
<valtype
> *val
;
4208 HOST_WIDE_INT size
= 0;
4210 for (val
= base
; val
; val
= val
->scc_next
)
4212 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
4213 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
4216 for (val
= base
; val
; val
= val
->scc_next
)
4218 processed_srcvals
.empty ();
4219 for (src
= val
->sources
; src
; src
= src
->next
)
4221 && src
->cs
->maybe_hot_p ())
4223 if (!processed_srcvals
.add (src
->val
))
4225 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
4226 if (prop_size
< INT_MAX
)
4227 src
->val
->prop_size_cost
= prop_size
;
4232 int special_factor
= 1;
4233 if (val
->same_scc (src
->val
))
4235 = opt_for_fn(src
->cs
->caller
->decl
,
4236 param_ipa_cp_recursive_freq_factor
);
4237 else if (val
->self_recursion_generated_p ()
4238 && (src
->cs
->callee
->function_symbol ()
4239 == src
->cs
->caller
))
4241 int max_recur_gen_depth
4242 = opt_for_fn(src
->cs
->caller
->decl
,
4243 param_ipa_cp_max_recursive_depth
);
4244 special_factor
= max_recur_gen_depth
4245 - val
->self_recursion_generated_level
+ 1;
4248 src
->val
->prop_time_benefit
4249 += time
* special_factor
* src
->cs
->sreal_frequency ();
4254 val
->prop_time_benefit
= time
;
4255 val
->prop_size_cost
= size
;
4259 val
->prop_time_benefit
= 0;
4260 val
->prop_size_cost
= 0;
4266 /* Callback for qsort to sort counts of all edges. */
4269 compare_edge_profile_counts (const void *a
, const void *b
)
4271 const profile_count
*cnt1
= (const profile_count
*) a
;
4272 const profile_count
*cnt2
= (const profile_count
*) b
;
4282 /* Propagate constants, polymorphic contexts and their effects from the
4283 summaries interprocedurally. */
4286 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4288 struct cgraph_node
*node
;
4291 fprintf (dump_file
, "\n Propagating constants:\n\n");
4293 base_count
= profile_count::uninitialized ();
4295 bool compute_count_base
= false;
4296 unsigned base_count_pos_percent
= 0;
4297 FOR_EACH_DEFINED_FUNCTION (node
)
4299 if (node
->has_gimple_body_p ()
4300 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4301 && opt_for_fn (node
->decl
, optimize
))
4303 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4304 determine_versionability (node
, info
);
4306 unsigned nlattices
= ipa_get_param_count (info
);
4307 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
4308 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
4309 initialize_node_lattices (node
);
4311 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4312 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4313 overall_size
+= s
->self_size
;
4314 if (node
->count
.ipa ().initialized_p ())
4316 compute_count_base
= true;
4317 unsigned pos_percent
= opt_for_fn (node
->decl
,
4318 param_ipa_cp_profile_count_base
);
4319 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4323 if (compute_count_base
)
4325 auto_vec
<profile_count
> all_edge_counts
;
4326 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4327 FOR_EACH_DEFINED_FUNCTION (node
)
4328 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4330 profile_count count
= cs
->count
.ipa ();
4331 if (!count
.nonzero_p ())
4334 enum availability avail
;
4336 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4337 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4338 if (info
&& info
->versionable
)
4339 all_edge_counts
.quick_push (count
);
4342 if (!all_edge_counts
.is_empty ())
4344 gcc_assert (base_count_pos_percent
<= 100);
4345 all_edge_counts
.qsort (compare_edge_profile_counts
);
4347 unsigned base_count_pos
4348 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4349 base_count
= all_edge_counts
[base_count_pos
];
4353 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4354 "position %u, arriving at: ", all_edge_counts
.length (),
4356 base_count
.dump (dump_file
);
4357 fprintf (dump_file
, "\n");
4361 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4362 "continuing as if without profile feedback.\n");
4365 orig_overall_size
= overall_size
;
4368 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4370 propagate_constants_topo (topo
);
4372 ipcp_verify_propagated_values ();
4373 topo
->constants
.propagate_effects ();
4374 topo
->contexts
.propagate_effects ();
4378 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4379 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4383 /* Discover newly direct outgoing edges from NODE which is a new clone with
4384 known KNOWN_CSTS and make them direct. */
4387 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4388 vec
<tree
> known_csts
,
4389 vec
<ipa_polymorphic_call_context
>
4391 vec
<ipa_argagg_value
, va_gc
> *aggvals
)
4393 struct cgraph_edge
*ie
, *next_ie
;
4396 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4401 next_ie
= ie
->next_callee
;
4402 ipa_argagg_value_list
avs (aggvals
);
4403 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4407 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4408 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4409 int param_index
= ie
->indirect_info
->param_index
;
4410 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4414 if (cs
&& !agg_contents
&& !polymorphic
)
4416 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4417 int c
= ipa_get_controlled_uses (info
, param_index
);
4418 if (c
!= IPA_UNDESCRIBED_USE
4419 && !ipa_get_param_load_dereferenced (info
, param_index
))
4421 struct ipa_ref
*to_del
;
4424 ipa_set_controlled_uses (info
, param_index
, c
);
4425 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4426 fprintf (dump_file
, " controlled uses count of param "
4427 "%i bumped down to %i\n", param_index
, c
);
4429 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0,
4432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4433 fprintf (dump_file
, " and even removing its "
4434 "cloning-created reference\n");
4435 to_del
->remove_reference ();
4441 /* Turning calls to direct calls will improve overall summary. */
4443 ipa_update_overall_fn_summary (node
);
4446 class edge_clone_summary
;
4447 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4449 /* Edge clone summary. */
4451 class edge_clone_summary
4454 /* Default constructor. */
4455 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4457 /* Default destructor. */
4458 ~edge_clone_summary ()
4461 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4463 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4466 cgraph_edge
*prev_clone
;
4467 cgraph_edge
*next_clone
;
4470 class edge_clone_summary_t
:
4471 public call_summary
<edge_clone_summary
*>
4474 edge_clone_summary_t (symbol_table
*symtab
):
4475 call_summary
<edge_clone_summary
*> (symtab
)
4477 m_initialize_when_cloning
= true;
4480 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4481 edge_clone_summary
*src_data
,
4482 edge_clone_summary
*dst_data
) final override
;
4485 /* Edge duplication hook. */
4488 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4489 edge_clone_summary
*src_data
,
4490 edge_clone_summary
*dst_data
)
4492 if (src_data
->next_clone
)
4493 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4494 dst_data
->prev_clone
= src_edge
;
4495 dst_data
->next_clone
= src_data
->next_clone
;
4496 src_data
->next_clone
= dst_edge
;
4499 /* Return true is CS calls DEST or its clone for all contexts. When
4500 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4501 edges from/to an all-context clone. */
4504 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4505 bool allow_recursion_to_clone
)
4507 enum availability availability
;
4508 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4510 if (availability
<= AVAIL_INTERPOSABLE
)
4514 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4517 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4518 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4521 /* Return true if edge CS does bring about the value described by SRC to
4522 DEST_VAL of node DEST or its clone for all contexts. */
4525 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4526 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4528 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4530 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4531 || caller_info
->node_dead
)
4537 if (caller_info
->ipcp_orig_node
)
4540 if (src
->offset
== -1)
4541 t
= caller_info
->known_csts
[src
->index
];
4542 else if (ipcp_transformation
*ts
4543 = ipcp_get_transformation_summary (cs
->caller
))
4545 ipa_argagg_value_list
avl (ts
);
4546 t
= avl
.get_value (src
->index
, src
->offset
/ BITS_PER_UNIT
);
4548 return (t
!= NULL_TREE
4549 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4553 if (src
->val
== dest_val
)
4556 struct ipcp_agg_lattice
*aglat
;
4557 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4559 if (src
->offset
== -1)
4560 return (plats
->itself
.is_single_const ()
4561 && values_equal_for_ipcp_p (src
->val
->value
,
4562 plats
->itself
.values
->value
));
4565 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4567 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4568 if (aglat
->offset
== src
->offset
)
4569 return (aglat
->is_single_const ()
4570 && values_equal_for_ipcp_p (src
->val
->value
,
4571 aglat
->values
->value
));
4577 /* Return true if edge CS does bring about the value described by SRC to
4578 DST_VAL of node DEST or its clone for all contexts. */
4581 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4582 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4584 ipcp_value
<ipa_polymorphic_call_context
> *)
4586 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4588 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4589 || caller_info
->node_dead
)
4594 if (caller_info
->ipcp_orig_node
)
4595 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4596 && values_equal_for_ipcp_p (src
->val
->value
,
4597 caller_info
->known_contexts
[src
->index
]);
4599 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4601 return plats
->ctxlat
.is_single_const ()
4602 && values_equal_for_ipcp_p (src
->val
->value
,
4603 plats
->ctxlat
.values
->value
);
4606 /* Get the next clone in the linked list of clones of an edge. */
4608 static inline struct cgraph_edge
*
4609 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4611 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4612 return s
!= NULL
? s
->next_clone
: NULL
;
4615 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4616 of them is viable and hot, return true. In that case, for those that still
4617 hold, add their edge frequency and their number and cumulative profile
4618 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4619 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4621 template <typename valtype
>
4623 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4624 sreal
*freq_sum
, int *caller_count
,
4625 profile_count
*rec_count_sum
,
4626 profile_count
*nonrec_count_sum
)
4628 ipcp_value_source
<valtype
> *src
;
4631 profile_count rec_cnt
= profile_count::zero ();
4632 profile_count nonrec_cnt
= profile_count::zero ();
4634 bool non_self_recursive
= false;
4636 for (src
= val
->sources
; src
; src
= src
->next
)
4638 struct cgraph_edge
*cs
= src
->cs
;
4641 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4644 freq
+= cs
->sreal_frequency ();
4645 hot
|= cs
->maybe_hot_p ();
4646 if (cs
->caller
!= dest
)
4648 non_self_recursive
= true;
4649 if (cs
->count
.ipa ().initialized_p ())
4650 rec_cnt
+= cs
->count
.ipa ();
4652 else if (cs
->count
.ipa ().initialized_p ())
4653 nonrec_cnt
+= cs
->count
.ipa ();
4655 cs
= get_next_cgraph_edge_clone (cs
);
4659 /* If the only edges bringing a value are self-recursive ones, do not bother
4661 if (!non_self_recursive
)
4665 *caller_count
= count
;
4666 *rec_count_sum
= rec_cnt
;
4667 *nonrec_count_sum
= nonrec_cnt
;
4669 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4671 struct cgraph_edge
*cs
;
4673 /* Cold non-SCC source edge could trigger hot recursive execution of
4674 function. Consider the case as hot and rely on following cost model
4675 computation to further select right one. */
4676 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4677 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4684 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4685 to let a non-self-recursive caller be the first element. Thus, we can
4686 simplify intersecting operations on values that arrive from all of these
4687 callers, especially when there exists self-recursive call. Return true if
4688 this kind of adjustment is possible. */
4691 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4694 for (unsigned i
= 0; i
< callers
.length (); i
++)
4696 cgraph_edge
*cs
= callers
[i
];
4698 if (cs
->caller
!= node
)
4702 callers
[i
] = callers
[0];
4711 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4712 is assumed their number is known and equal to CALLER_COUNT. */
4714 template <typename valtype
>
4715 static vec
<cgraph_edge
*>
4716 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4719 ipcp_value_source
<valtype
> *src
;
4720 vec
<cgraph_edge
*> ret
;
4722 ret
.create (caller_count
);
4723 for (src
= val
->sources
; src
; src
= src
->next
)
4725 struct cgraph_edge
*cs
= src
->cs
;
4728 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4729 ret
.quick_push (cs
);
4730 cs
= get_next_cgraph_edge_clone (cs
);
4734 if (caller_count
> 1)
4735 adjust_callers_for_value_intersection (ret
, dest
);
4740 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4741 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4742 should be set to true when the reference created for the constant should be
4743 a load one and not an address one because the corresponding parameter p is
4746 static struct ipa_replace_map
*
4747 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4748 bool force_load_ref
)
4750 struct ipa_replace_map
*replace_map
;
4752 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4755 fprintf (dump_file
, " replacing ");
4756 ipa_dump_param (dump_file
, info
, parm_num
);
4758 fprintf (dump_file
, " with const ");
4759 print_generic_expr (dump_file
, value
);
4762 fprintf (dump_file
, " - forcing load reference\n");
4764 fprintf (dump_file
, "\n");
4766 replace_map
->parm_num
= parm_num
;
4767 replace_map
->new_tree
= value
;
4768 replace_map
->force_load_ref
= force_load_ref
;
4772 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4773 one, otherwise it will be referred to as the original node. */
4776 dump_profile_updates (cgraph_node
*node
, bool spec
)
4779 fprintf (dump_file
, " setting count of the specialized node %s to ",
4780 node
->dump_name ());
4782 fprintf (dump_file
, " setting count of the original node %s to ",
4783 node
->dump_name ());
4785 node
->count
.dump (dump_file
);
4786 fprintf (dump_file
, "\n");
4787 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4789 fprintf (dump_file
, " edge to %s has count ",
4790 cs
->callee
->dump_name ());
4791 cs
->count
.dump (dump_file
);
4792 fprintf (dump_file
, "\n");
4796 /* With partial train run we do not want to assume that original's count is
4797 zero whenever we redurect all executed edges to clone. Simply drop profile
4798 to local one in this case. In eany case, return the new value. ORIG_NODE
4799 is the original node and its count has not been updaed yet. */
4802 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4804 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4805 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4806 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4807 remainder
= remainder
.guessed_local ();
4812 /* Structure to sum counts coming from nodes other than the original node and
4815 struct gather_other_count_struct
4818 profile_count other_count
;
4821 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4822 counts that come from non-self-recursive calls.. */
4825 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4827 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4828 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4829 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4830 desc
->other_count
+= cs
->count
.ipa ();
4834 /* Structure to help analyze if we need to boost counts of some clones of some
4835 non-recursive edges to match the new callee count. */
4837 struct desc_incoming_count_struct
4840 hash_set
<cgraph_edge
*> *processed_edges
;
4841 profile_count count
;
4842 unsigned unproc_orig_rec_edges
;
4845 /* Go over edges calling NODE and its thunks and gather information about
4846 incoming counts so that we know if we need to make any adjustments. */
4849 analyze_clone_icoming_counts (cgraph_node
*node
,
4850 desc_incoming_count_struct
*desc
)
4852 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4853 if (cs
->caller
->thunk
)
4855 analyze_clone_icoming_counts (cs
->caller
, desc
);
4860 if (cs
->count
.initialized_p ())
4861 desc
->count
+= cs
->count
.ipa ();
4862 if (!desc
->processed_edges
->contains (cs
)
4863 && cs
->caller
->clone_of
== desc
->orig
)
4864 desc
->unproc_orig_rec_edges
++;
4868 /* If caller edge counts of a clone created for a self-recursive arithmetic
4869 jump function must be adjusted because it is coming from a the "seed" clone
4870 for the first value and so has been excessively scaled back as if it was not
4871 a recursive call, adjust it so that the incoming counts of NODE match its
4872 count. NODE is the node or its thunk. */
4875 adjust_clone_incoming_counts (cgraph_node
*node
,
4876 desc_incoming_count_struct
*desc
)
4878 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4879 if (cs
->caller
->thunk
)
4881 adjust_clone_incoming_counts (cs
->caller
, desc
);
4882 profile_count sum
= profile_count::zero ();
4883 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4884 if (e
->count
.initialized_p ())
4885 sum
+= e
->count
.ipa ();
4886 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4888 else if (!desc
->processed_edges
->contains (cs
)
4889 && cs
->caller
->clone_of
== desc
->orig
)
4891 cs
->count
+= desc
->count
;
4894 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4895 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4896 cs
->callee
->dump_name ());
4897 cs
->count
.dump (dump_file
);
4898 fprintf (dump_file
, "\n");
4903 /* When ORIG_NODE has been cloned for values which have been generated fora
4904 self-recursive call as a result of an arithmetic pass-through
4905 jump-functions, adjust its count together with counts of all such clones in
4906 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4908 The function sums the counts of the original node and all its clones that
4909 cannot be attributed to a specific clone because it comes from a
4910 non-recursive edge. This sum is then evenly divided between the clones and
4911 on top of that each one gets all the counts which can be attributed directly
4915 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4916 const vec
<cgraph_node
*> &self_gen_clones
)
4918 profile_count redist_sum
= orig_node
->count
.ipa ();
4919 if (!(redist_sum
> profile_count::zero ()))
4923 fprintf (dump_file
, " Updating profile of self recursive clone "
4926 gather_other_count_struct gocs
;
4927 gocs
.orig
= orig_node
;
4928 gocs
.other_count
= profile_count::zero ();
4930 auto_vec
<profile_count
, 8> other_edges_count
;
4931 for (cgraph_node
*n
: self_gen_clones
)
4933 gocs
.other_count
= profile_count::zero ();
4934 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4936 other_edges_count
.safe_push (gocs
.other_count
);
4937 redist_sum
-= gocs
.other_count
;
4940 hash_set
<cgraph_edge
*> processed_edges
;
4942 for (cgraph_node
*n
: self_gen_clones
)
4944 profile_count orig_count
= n
->count
;
4945 profile_count new_count
4946 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4947 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4948 n
->count
= new_count
;
4949 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4950 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4952 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4953 processed_edges
.add (cs
);
4955 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4956 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4961 /* There are still going to be edges to ORIG_NODE that have one or more
4962 clones coming from another node clone in SELF_GEN_CLONES and which we
4963 scaled by the same amount, which means that the total incoming sum of
4964 counts to ORIG_NODE will be too high, scale such edges back. */
4965 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4967 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4970 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4971 if (e
->callee
->ultimate_alias_target () == orig_node
4972 && processed_edges
.contains (e
))
4975 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4976 if (e
->callee
->ultimate_alias_target () == orig_node
4977 && processed_edges
.contains (e
))
4982 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4983 along self-recursive edges are likely to have fairly low count and so
4984 edges from them to nodes in the self_gen_clones do not correspond to the
4985 artificially distributed count of the nodes, the total sum of incoming
4986 edges to some clones might be too low. Detect this situation and correct
4988 for (cgraph_node
*n
: self_gen_clones
)
4990 if (!(n
->count
.ipa () > profile_count::zero ()))
4993 desc_incoming_count_struct desc
;
4994 desc
.orig
= orig_node
;
4995 desc
.processed_edges
= &processed_edges
;
4996 desc
.count
= profile_count::zero ();
4997 desc
.unproc_orig_rec_edges
= 0;
4998 analyze_clone_icoming_counts (n
, &desc
);
5000 if (n
->count
.differs_from_p (desc
.count
))
5002 if (n
->count
> desc
.count
5003 && desc
.unproc_orig_rec_edges
> 0)
5005 desc
.count
= n
->count
- desc
.count
;
5006 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
5007 adjust_clone_incoming_counts (n
, &desc
);
5011 " Unable to fix up incoming counts for %s.\n",
5017 for (cgraph_node
*n
: self_gen_clones
)
5018 dump_profile_updates (n
, n
!= orig_node
);
5022 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5023 their profile information to reflect this. This function should not be used
5024 for clones generated for arithmetic pass-through jump functions on a
5025 self-recursive call graph edge, that situation is handled by
5026 update_counts_for_self_gen_clones. */
5029 update_profiling_info (struct cgraph_node
*orig_node
,
5030 struct cgraph_node
*new_node
)
5032 struct caller_statistics stats
;
5033 profile_count new_sum
;
5034 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
5036 if (!(orig_node_count
> profile_count::zero ()))
5041 fprintf (dump_file
, " Updating profile from original count: ");
5042 orig_node_count
.dump (dump_file
);
5043 fprintf (dump_file
, "\n");
5046 init_caller_stats (&stats
, new_node
);
5047 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
5049 new_sum
= stats
.count_sum
;
5051 bool orig_edges_processed
= false;
5052 if (new_sum
> orig_node_count
)
5054 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5055 to global0 category. */
5056 remainder
= orig_node
->count
.global0 ();
5058 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5059 cs
->count
= cs
->count
.global0 ();
5060 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5062 cs
= cs
->next_callee
)
5063 cs
->count
= cs
->count
.global0 ();
5064 orig_edges_processed
= true;
5066 else if (stats
.rec_count_sum
.nonzero_p ())
5068 int new_nonrec_calls
= stats
.n_nonrec_calls
;
5069 /* There are self-recursive edges which are likely to bring in the
5070 majority of calls but which we must divide in between the original and
5072 init_caller_stats (&stats
, orig_node
);
5073 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
5075 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
5076 profile_count orig_nonrec_call_count
= stats
.count_sum
;
5078 if (orig_node
->local
)
5080 if (!orig_nonrec_call_count
.nonzero_p ())
5083 fprintf (dump_file
, " The original is local and the only "
5084 "incoming edges from non-dead callers with nonzero "
5085 "counts are self-recursive, assuming it is cold.\n");
5086 /* The NEW_NODE count and counts of all its outgoing edges
5087 are still unmodified copies of ORIG_NODE's. Just clear
5088 the latter and bail out. */
5090 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
5091 zero
= profile_count::zero ().guessed_local ();
5093 zero
= profile_count::adjusted_zero ();
5094 orig_node
->count
= zero
;
5095 for (cgraph_edge
*cs
= orig_node
->callees
;
5097 cs
= cs
->next_callee
)
5099 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5101 cs
= cs
->next_callee
)
5108 /* Let's behave as if there was another caller that accounts for all
5109 the calls that were either indirect or from other compilation
5111 orig_nonrec_calls
++;
5112 profile_count pretend_caller_count
5113 = (orig_node_count
- new_sum
- orig_nonrec_call_count
5114 - stats
.rec_count_sum
);
5115 orig_nonrec_call_count
+= pretend_caller_count
;
5118 /* Divide all "unexplained" counts roughly proportionally to sums of
5119 counts of non-recursive calls.
5121 We put rather arbitrary limits on how many counts we claim because the
5122 number of non-self-recursive incoming count is only a rough guideline
5123 and there are cases (such as mcf) where using it blindly just takes
5124 too many. And if lattices are considered in the opposite order we
5125 could also take too few. */
5126 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
5128 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
5129 profile_count new_part
5130 = MAX(MIN (unexp
.apply_scale (new_sum
,
5131 new_sum
+ orig_nonrec_call_count
),
5132 unexp
.apply_scale (limit_den
- 1, limit_den
)),
5133 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
5136 fprintf (dump_file
, " Claiming ");
5137 new_part
.dump (dump_file
);
5138 fprintf (dump_file
, " of unexplained ");
5139 unexp
.dump (dump_file
);
5140 fprintf (dump_file
, " counts because of self-recursive "
5143 new_sum
+= new_part
;
5144 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5148 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5151 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
5152 new_node
->count
= new_sum
;
5153 orig_node
->count
= remainder
;
5155 profile_count orig_new_node_count
= orig_node_count
;
5156 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
5157 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5158 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5159 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
5160 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5162 if (!orig_edges_processed
)
5164 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
5165 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5166 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5167 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5169 cs
= cs
->next_callee
)
5170 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5175 dump_profile_updates (new_node
, true);
5176 dump_profile_updates (orig_node
, false);
5180 /* Update the respective profile of specialized NEW_NODE and the original
5181 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5182 have been redirected to the specialized version. */
5185 update_specialized_profile (struct cgraph_node
*new_node
,
5186 struct cgraph_node
*orig_node
,
5187 profile_count redirected_sum
)
5189 struct cgraph_edge
*cs
;
5190 profile_count new_node_count
, orig_node_count
= orig_node
->count
.ipa ();
5194 fprintf (dump_file
, " the sum of counts of redirected edges is ");
5195 redirected_sum
.dump (dump_file
);
5196 fprintf (dump_file
, "\n old ipa count of the original node is ");
5197 orig_node_count
.dump (dump_file
);
5198 fprintf (dump_file
, "\n");
5200 if (!(orig_node_count
> profile_count::zero ()))
5203 new_node_count
= new_node
->count
;
5204 new_node
->count
+= redirected_sum
;
5206 = lenient_count_portion_handling (orig_node
->count
- redirected_sum
,
5209 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5210 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
5212 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5214 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
5221 dump_profile_updates (new_node
, true);
5222 dump_profile_updates (orig_node
, false);
5226 static void adjust_references_in_caller (cgraph_edge
*cs
,
5227 symtab_node
*symbol
, int index
);
5229 /* Simple structure to pass a symbol and index (with same meaning as parameters
5230 of adjust_references_in_caller) through a void* parameter of a
5231 call_for_symbol_thunks_and_aliases callback. */
5232 struct symbol_and_index_together
5234 symtab_node
*symbol
;
5238 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5239 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5241 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
5243 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
5244 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
5245 if (!cs
->caller
->thunk
)
5246 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
5250 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5251 variable which is only dereferenced and which is represented by SYMBOL. See
5252 if we can remove ADDR reference in callers assosiated witht the call. */
5255 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
5257 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5258 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
5259 if (jfunc
->type
== IPA_JF_CONST
)
5261 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
5266 to_del
->remove_reference ();
5267 ipa_zap_jf_refdesc (jfunc
);
5269 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5270 cs
->caller
->dump_name (), symbol
->dump_name ());
5274 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
5275 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
5276 || ipa_get_jf_pass_through_refdesc_decremented (jfunc
))
5279 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5280 cgraph_node
*caller
= cs
->caller
;
5281 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
5282 /* TODO: This consistency check may be too big and not really
5283 that useful. Consider removing it. */
5285 if (caller_info
->ipcp_orig_node
)
5286 cst
= caller_info
->known_csts
[fidx
];
5289 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
5290 gcc_assert (lat
->is_single_const ());
5291 cst
= lat
->values
->value
;
5293 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
5294 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
5297 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
5298 if (cuses
== IPA_UNDESCRIBED_USE
)
5300 gcc_assert (cuses
> 0);
5302 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
5303 ipa_set_jf_pass_through_refdesc_decremented (jfunc
, true);
5304 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5305 fprintf (dump_file
, " Controlled uses of parameter %i of %s dropped "
5306 "to %i.\n", fidx
, caller
->dump_name (), cuses
);
5310 if (caller_info
->ipcp_orig_node
)
5312 /* Cloning machinery has created a reference here, we need to either
5313 remove it or change it to a read one. */
5314 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0, IPA_REF_ADDR
);
5317 to_del
->remove_reference ();
5319 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5320 cs
->caller
->dump_name (), symbol
->dump_name ());
5321 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5323 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5326 " ...and replaced it with LOAD one.\n");
5331 symbol_and_index_together pack
;
5332 pack
.symbol
= symbol
;
5334 if (caller
->can_change_signature
)
5335 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5340 /* Return true if we would like to remove a parameter from NODE when cloning it
5341 with KNOWN_CSTS scalar constants. */
5344 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5346 auto_vec
<bool, 16> surviving
;
5347 bool filled_vec
= false;
5348 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5349 int i
, count
= ipa_get_param_count (info
);
5351 for (i
= 0; i
< count
; i
++)
5353 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5358 clone_info
*info
= clone_info::get (node
);
5359 if (!info
|| !info
->param_adjustments
)
5361 info
->param_adjustments
->get_surviving_params (&surviving
);
5364 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5370 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5371 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5372 redirect all edges in CALLERS to it. */
5374 static struct cgraph_node
*
5375 create_specialized_node (struct cgraph_node
*node
,
5376 vec
<tree
> known_csts
,
5377 vec
<ipa_polymorphic_call_context
> known_contexts
,
5378 vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5379 vec
<cgraph_edge
*> &callers
)
5381 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5382 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5383 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5384 struct cgraph_node
*new_node
;
5385 int i
, count
= ipa_get_param_count (info
);
5386 clone_info
*cinfo
= clone_info::get (node
);
5387 ipa_param_adjustments
*old_adjustments
= cinfo
5388 ? cinfo
->param_adjustments
: NULL
;
5389 ipa_param_adjustments
*new_adjustments
;
5390 gcc_assert (!info
->ipcp_orig_node
);
5391 gcc_assert (node
->can_change_signature
5392 || !old_adjustments
);
5394 if (old_adjustments
)
5396 /* At the moment all IPA optimizations should use the number of
5397 parameters of the prevailing decl as the m_always_copy_start.
5398 Handling any other value would complicate the code below, so for the
5399 time bing let's only assert it is so. */
5400 gcc_assert (old_adjustments
->m_always_copy_start
== count
5401 || old_adjustments
->m_always_copy_start
< 0);
5402 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5403 for (i
= 0; i
< old_adj_count
; i
++)
5405 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5406 if (!node
->can_change_signature
5407 || old_adj
->op
!= IPA_PARAM_OP_COPY
5408 || (!known_csts
[old_adj
->base_index
]
5409 && ipa_is_param_used (info
, old_adj
->base_index
)))
5411 ipa_adjusted_param new_adj
= *old_adj
;
5413 new_adj
.prev_clone_adjustment
= true;
5414 new_adj
.prev_clone_index
= i
;
5415 vec_safe_push (new_params
, new_adj
);
5418 bool skip_return
= old_adjustments
->m_skip_return
;
5419 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5420 ipa_param_adjustments (new_params
, count
,
5423 else if (node
->can_change_signature
5424 && want_remove_some_param_p (node
, known_csts
))
5426 ipa_adjusted_param adj
;
5427 memset (&adj
, 0, sizeof (adj
));
5428 adj
.op
= IPA_PARAM_OP_COPY
;
5429 for (i
= 0; i
< count
; i
++)
5430 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5433 adj
.prev_clone_index
= i
;
5434 vec_safe_push (new_params
, adj
);
5436 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5437 ipa_param_adjustments (new_params
, count
, false));
5440 new_adjustments
= NULL
;
5442 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5443 for (i
= callers
.length () - 1; i
>= 0; i
--)
5445 cgraph_edge
*cs
= callers
[i
];
5446 if (cs
->caller
== node
)
5448 self_recursive_calls
.safe_push (cs
);
5449 callers
.unordered_remove (i
);
5452 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5453 for (i
= 0; i
< count
; i
++)
5455 tree t
= known_csts
[i
];
5459 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5461 bool load_ref
= false;
5462 symtab_node
*ref_symbol
;
5463 if (TREE_CODE (t
) == ADDR_EXPR
)
5465 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5466 if (TREE_CODE (base
) == VAR_DECL
5467 && ipa_get_controlled_uses (info
, i
) == 0
5468 && ipa_get_param_load_dereferenced (info
, i
)
5469 && (ref_symbol
= symtab_node::get (base
)))
5472 if (node
->can_change_signature
)
5473 for (cgraph_edge
*caller
: callers
)
5474 adjust_references_in_caller (caller
, ref_symbol
, i
);
5478 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5480 vec_safe_push (replace_trees
, replace_map
);
5483 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5484 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5486 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5487 new_adjustments
, "constprop",
5491 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5492 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5494 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5495 /* Cloned edges can disappear during cloning as speculation can be
5496 resolved, check that we have one and that it comes from the last
5498 if (cs
&& cs
->caller
== new_node
)
5499 cs
->redirect_callee_duplicating_thunks (new_node
);
5500 /* Any future code that would make more than one clone of an outgoing
5501 edge would confuse this mechanism, so let's check that does not
5503 gcc_checking_assert (!cs
5504 || !get_next_cgraph_edge_clone (cs
)
5505 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5507 if (have_self_recursive_calls
)
5508 new_node
->expand_all_artificial_thunks ();
5510 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5511 for (const ipa_argagg_value
&av
: aggvals
)
5512 new_node
->maybe_create_reference (av
.value
, NULL
);
5514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5516 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5517 if (known_contexts
.exists ())
5519 for (i
= 0; i
< count
; i
++)
5520 if (!known_contexts
[i
].useless_p ())
5522 fprintf (dump_file
, " known ctx %i is ", i
);
5523 known_contexts
[i
].dump (dump_file
);
5528 fprintf (dump_file
, " Aggregate replacements:");
5529 ipa_argagg_value_list
avs (aggvals
);
5530 avs
.dump (dump_file
);
5534 new_info
= ipa_node_params_sum
->get (new_node
);
5535 new_info
->ipcp_orig_node
= node
;
5536 new_node
->ipcp_clone
= true;
5537 new_info
->known_csts
= known_csts
;
5538 new_info
->known_contexts
= known_contexts
;
5540 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
,
5546 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5547 pass-through function to itself when the cgraph_node involved is not an
5548 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5549 no-operation pass-through. */
5552 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5555 enum availability availability
;
5556 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5557 && availability
> AVAIL_INTERPOSABLE
5558 && jfunc
->type
== IPA_JF_PASS_THROUGH
5559 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5560 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5561 && ipa_node_params_sum
->get (cs
->caller
)
5562 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5567 /* Return true if JFUNC, which describes a part of an aggregate represented or
5568 pointed to by the i-th parameter of call CS, is a pass-through function to
5569 itself when the cgraph_node involved is not an IPA-CP clone.. When
5570 SIMPLE is true, further check if JFUNC is a simple no-operation
5574 self_recursive_agg_pass_through_p (const cgraph_edge
*cs
,
5575 const ipa_agg_jf_item
*jfunc
,
5576 int i
, bool simple
= true)
5578 enum availability availability
;
5579 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5580 && availability
> AVAIL_INTERPOSABLE
5581 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5582 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5583 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5584 && jfunc
->value
.pass_through
.formal_id
== i
5585 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5586 && ipa_node_params_sum
->get (cs
->caller
)
5587 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5592 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5593 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5596 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5597 vec
<tree
> &known_csts
,
5598 const vec
<cgraph_edge
*> &callers
)
5600 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5601 int i
, count
= ipa_get_param_count (info
);
5603 for (i
= 0; i
< count
; i
++)
5605 struct cgraph_edge
*cs
;
5606 tree newval
= NULL_TREE
;
5609 tree type
= ipa_get_type (info
, i
);
5611 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5614 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5616 struct ipa_jump_func
*jump_func
;
5619 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5621 || i
>= ipa_get_cs_argument_count (args
)
5623 && call_passes_through_thunk (cs
)))
5628 jump_func
= ipa_get_ith_jump_func (args
, i
);
5630 /* Besides simple pass-through jump function, arithmetic jump
5631 function could also introduce argument-direct-pass-through for
5632 self-feeding recursive call. For example,
5639 Given that i is 0, recursive propagation via (i & 1) also gets
5641 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5643 gcc_assert (newval
);
5644 t
= ipa_get_jf_arith_result (
5645 ipa_get_jf_pass_through_operation (jump_func
),
5647 ipa_get_jf_pass_through_operand (jump_func
),
5651 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5655 && !values_equal_for_ipcp_p (t
, newval
))
5656 || (!first
&& !newval
))
5668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5670 fprintf (dump_file
, " adding an extra known scalar value ");
5671 print_ipcp_constant_value (dump_file
, newval
);
5672 fprintf (dump_file
, " for ");
5673 ipa_dump_param (dump_file
, info
, i
);
5674 fprintf (dump_file
, "\n");
5677 known_csts
[i
] = newval
;
5682 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5683 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5687 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5688 vec
<ipa_polymorphic_call_context
>
5690 const vec
<cgraph_edge
*> &callers
)
5692 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5693 int i
, count
= ipa_get_param_count (info
);
5695 for (i
= 0; i
< count
; i
++)
5699 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5700 || (known_contexts
->exists ()
5701 && !(*known_contexts
)[i
].useless_p ()))
5704 ipa_polymorphic_call_context newval
;
5708 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5710 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5712 || i
>= ipa_get_cs_argument_count (args
))
5714 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5715 ipa_polymorphic_call_context ctx
;
5716 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5724 newval
.meet_with (ctx
);
5725 if (newval
.useless_p ())
5729 if (!newval
.useless_p ())
5731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5733 fprintf (dump_file
, " adding an extra known polymorphic "
5735 print_ipcp_constant_value (dump_file
, newval
);
5736 fprintf (dump_file
, " for ");
5737 ipa_dump_param (dump_file
, info
, i
);
5738 fprintf (dump_file
, "\n");
5741 if (!known_contexts
->exists ())
5742 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5744 (*known_contexts
)[i
] = newval
;
5750 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5751 RES. If INTERIM is non-NULL, it contains the current interim state of
5752 collected aggregate values which can be used to compute values passed over
5753 self-recursive edges.
5755 This basically one iteration of push_agg_values_from_edge over one
5756 parameter, which allows for simpler early returns. */
5759 push_agg_values_for_index_from_edge (struct cgraph_edge
*cs
, int index
,
5760 vec
<ipa_argagg_value
> *res
,
5761 const ipa_argagg_value_list
*interim
)
5763 bool agg_values_from_caller
= false;
5764 bool agg_jf_preserved
= false;
5765 unsigned unit_delta
= UINT_MAX
;
5767 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
),
5770 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5771 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5773 agg_values_from_caller
= true;
5774 agg_jf_preserved
= ipa_get_jf_pass_through_agg_preserved (jfunc
);
5775 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5778 else if (jfunc
->type
== IPA_JF_ANCESTOR
5779 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5781 agg_values_from_caller
= true;
5782 agg_jf_preserved
= true;
5783 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5784 unit_delta
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
5787 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5788 if (agg_values_from_caller
)
5790 if (caller_info
->ipcp_orig_node
)
5792 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5793 ipcp_transformation
*ts
5794 = ipcp_get_transformation_summary (cs
->caller
);
5795 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5796 ipcp_param_lattices
*orig_plats
5797 = ipa_get_parm_lattices (orig_info
, src_idx
);
5800 && (agg_jf_preserved
|| !orig_plats
->aggs_by_ref
))
5802 ipa_argagg_value_list
src (ts
);
5803 src
.push_adjusted_values (src_idx
, index
, unit_delta
, res
);
5809 ipcp_param_lattices
*src_plats
5810 = ipa_get_parm_lattices (caller_info
, src_idx
);
5812 && !src_plats
->aggs_bottom
5813 && (agg_jf_preserved
|| !src_plats
->aggs_by_ref
))
5815 if (interim
&& self_recursive_pass_through_p (cs
, jfunc
, index
))
5817 interim
->push_adjusted_values (src_idx
, index
, unit_delta
,
5821 if (!src_plats
->aggs_contain_variable
)
5823 push_agg_values_from_plats (src_plats
, index
, unit_delta
,
5831 if (!jfunc
->agg
.items
)
5834 unsigned prev_unit_offset
= 0;
5835 for (const ipa_agg_jf_item
&agg_jf
: *jfunc
->agg
.items
)
5837 tree value
, srcvalue
;
5838 /* Besides simple pass-through aggregate jump function, arithmetic
5839 aggregate jump function could also bring same aggregate value as
5840 parameter passed-in for self-feeding recursive call. For example,
5848 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5850 && self_recursive_agg_pass_through_p (cs
, &agg_jf
, index
, false)
5851 && (srcvalue
= interim
->get_value(index
,
5852 agg_jf
.offset
/ BITS_PER_UNIT
)))
5853 value
= ipa_get_jf_arith_result (agg_jf
.value
.pass_through
.operation
,
5855 agg_jf
.value
.pass_through
.operand
,
5858 value
= ipa_agg_value_from_jfunc (caller_info
, cs
->caller
,
5862 struct ipa_argagg_value iav
;
5864 iav
.unit_offset
= agg_jf
.offset
/ BITS_PER_UNIT
;
5866 iav
.by_ref
= jfunc
->agg
.by_ref
;
5870 || iav
.unit_offset
> prev_unit_offset
);
5871 prev_unit_offset
= iav
.unit_offset
;
5874 res
->safe_push (iav
);
5880 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5881 description of ultimate callee of CS or the one it was cloned from (the
5882 summary where lattices are). If INTERIM is non-NULL, it contains the
5883 current interim state of collected aggregate values which can be used to
5884 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5885 is true) and to skip values which clearly will not be part of intersection
5889 push_agg_values_from_edge (struct cgraph_edge
*cs
,
5890 ipa_node_params
*dest_info
,
5891 vec
<ipa_argagg_value
> *res
,
5892 const ipa_argagg_value_list
*interim
,
5893 bool optimize_self_recursion
)
5895 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5899 int count
= MIN (ipa_get_param_count (dest_info
),
5900 ipa_get_cs_argument_count (args
));
5902 unsigned interim_index
= 0;
5903 for (int index
= 0; index
< count
; index
++)
5907 while (interim_index
< interim
->m_elts
.size ()
5908 && interim
->m_elts
[interim_index
].value
5909 && interim
->m_elts
[interim_index
].index
< index
)
5911 if (interim_index
>= interim
->m_elts
.size ()
5912 || interim
->m_elts
[interim_index
].index
> index
)
5916 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, index
);
5917 if (!ipa_is_param_used (dest_info
, index
)
5918 || plats
->aggs_bottom
)
5920 push_agg_values_for_index_from_edge (cs
, index
, res
,
5921 optimize_self_recursion
? interim
5927 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5928 from all of them. Return nullptr if there are none. */
5930 static struct vec
<ipa_argagg_value
, va_gc
> *
5931 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5932 const vec
<cgraph_edge
*> &callers
)
5934 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5935 if (dest_info
->ipcp_orig_node
)
5936 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5938 /* gather_edges_for_value puts a non-recursive call into the first element of
5939 callers if it can. */
5940 auto_vec
<ipa_argagg_value
, 32> interim
;
5941 push_agg_values_from_edge (callers
[0], dest_info
, &interim
, NULL
, true);
5943 unsigned valid_entries
= interim
.length ();
5947 unsigned caller_count
= callers
.length();
5948 for (unsigned i
= 1; i
< caller_count
; i
++)
5950 auto_vec
<ipa_argagg_value
, 32> last
;
5951 ipa_argagg_value_list
avs (&interim
);
5952 push_agg_values_from_edge (callers
[i
], dest_info
, &last
, &avs
, true);
5954 valid_entries
= intersect_argaggs_with (interim
, last
);
5959 vec
<ipa_argagg_value
, va_gc
> *res
= NULL
;
5960 vec_safe_reserve_exact (res
, valid_entries
);
5961 for (const ipa_argagg_value
&av
: interim
)
5963 res
->quick_push(av
);
5964 gcc_checking_assert (res
->length () == valid_entries
);
5968 /* Determine whether CS also brings all scalar values that the NODE is
5972 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5973 struct cgraph_node
*node
)
5975 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5976 int count
= ipa_get_param_count (dest_info
);
5977 class ipa_node_params
*caller_info
;
5978 class ipa_edge_args
*args
;
5981 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5982 args
= ipa_edge_args_sum
->get (cs
);
5983 for (i
= 0; i
< count
; i
++)
5985 struct ipa_jump_func
*jump_func
;
5988 val
= dest_info
->known_csts
[i
];
5992 if (i
>= ipa_get_cs_argument_count (args
))
5994 jump_func
= ipa_get_ith_jump_func (args
, i
);
5995 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5996 ipa_get_type (dest_info
, i
));
5997 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
6003 /* Determine whether CS also brings all aggregate values that NODE is
6007 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
6008 struct cgraph_node
*node
)
6010 ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
);
6011 if (!ts
|| vec_safe_is_empty (ts
->m_agg_values
))
6014 const ipa_argagg_value_list
existing (ts
->m_agg_values
);
6015 auto_vec
<ipa_argagg_value
, 32> edge_values
;
6016 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
6017 gcc_checking_assert (dest_info
->ipcp_orig_node
);
6018 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
6019 push_agg_values_from_edge (cs
, dest_info
, &edge_values
, &existing
, false);
6020 const ipa_argagg_value_list
avl (&edge_values
);
6021 return avl
.superset_of_p (existing
);
6024 /* Given an original NODE and a VAL for which we have already created a
6025 specialized clone, look whether there are incoming edges that still lead
6026 into the old node but now also bring the requested value and also conform to
6027 all other criteria such that they can be redirected the special node.
6028 This function can therefore redirect the final edge in a SCC. */
6030 template <typename valtype
>
6032 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
6034 ipcp_value_source
<valtype
> *src
;
6035 profile_count redirected_sum
= profile_count::zero ();
6037 for (src
= val
->sources
; src
; src
= src
->next
)
6039 struct cgraph_edge
*cs
= src
->cs
;
6042 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
6043 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
6044 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
6047 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
6048 cs
->caller
->dump_name (),
6049 val
->spec_node
->dump_name ());
6051 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
6052 val
->spec_node
->expand_all_artificial_thunks ();
6053 if (cs
->count
.ipa ().initialized_p ())
6054 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
6056 cs
= get_next_cgraph_edge_clone (cs
);
6060 if (redirected_sum
.nonzero_p ())
6061 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
6064 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6067 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
6069 ipa_polymorphic_call_context
*ctx
;
6072 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
6073 if (!ctx
->useless_p ())
6078 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6080 static vec
<ipa_polymorphic_call_context
>
6081 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
6083 if (known_contexts_useful_p (known_contexts
))
6084 return known_contexts
.copy ();
6089 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6090 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6094 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6095 vec
<tree
> *known_csts
,
6096 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6097 ipcp_value
<tree
> *val
, int index
)
6099 *known_csts
= avals
->m_known_vals
.copy ();
6100 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6101 (*known_csts
)[index
] = val
->value
;
6104 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6105 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6109 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6110 vec
<tree
> *known_csts
,
6111 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6112 ipcp_value
<ipa_polymorphic_call_context
> *val
,
6115 *known_csts
= avals
->m_known_vals
.copy ();
6116 *known_contexts
= avals
->m_known_contexts
.copy ();
6117 (*known_contexts
)[index
] = val
->value
;
6120 /* Return true if OFFSET indicates this was not an aggregate value or there is
6121 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6125 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *aggvals
,
6126 int index
, HOST_WIDE_INT offset
, tree value
)
6131 const ipa_argagg_value_list
avl (aggvals
);
6132 tree v
= avl
.get_value (index
, offset
/ BITS_PER_UNIT
);
6133 return v
&& values_equal_for_ipcp_p (v
, value
);
6136 /* Return true if offset is minus one because source of a polymorphic context
6137 cannot be an aggregate value. */
6140 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *,
6141 int , HOST_WIDE_INT offset
,
6142 ipa_polymorphic_call_context
)
6144 return offset
== -1;
6147 /* Decide whether to create a special version of NODE for value VAL of
6148 parameter at the given INDEX. If OFFSET is -1, the value is for the
6149 parameter itself, otherwise it is stored at the given OFFSET of the
6150 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6151 is a vector which contains clones created for self-recursive calls with an
6152 arithmetic pass-through jump function. */
6154 template <typename valtype
>
6156 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6157 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6158 vec
<cgraph_node
*> *self_gen_clones
)
6162 profile_count count_sum
, rec_count_sum
;
6163 vec
<cgraph_edge
*> callers
;
6167 perhaps_add_new_callers (node
, val
);
6170 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6172 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6173 fprintf (dump_file
, " Ignoring candidate value because "
6174 "maximum unit size would be reached with %li.\n",
6175 val
->local_size_cost
+ overall_size
);
6178 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6179 &rec_count_sum
, &count_sum
))
6182 if (!dbg_cnt (ipa_cp_values
))
6185 if (val
->self_recursion_generated_p ())
6187 /* The edge counts in this case might not have been adjusted yet.
6188 Nevertleless, even if they were it would be only a guesswork which we
6189 can do now. The recursive part of the counts can be derived from the
6190 count of the original node anyway. */
6191 if (node
->count
.ipa ().nonzero_p ())
6193 unsigned dem
= self_gen_clones
->length () + 1;
6194 rec_count_sum
= node
->count
.ipa () / dem
;
6197 rec_count_sum
= profile_count::zero ();
6200 /* get_info_about_necessary_edges only sums up ipa counts. */
6201 count_sum
+= rec_count_sum
;
6203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6205 fprintf (dump_file
, " - considering value ");
6206 print_ipcp_constant_value (dump_file
, val
->value
);
6207 fprintf (dump_file
, " for ");
6208 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6210 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6211 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6214 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6215 freq_sum
, count_sum
,
6216 val
->local_size_cost
)
6217 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6218 freq_sum
, count_sum
, val
->prop_size_cost
))
6222 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6223 node
->dump_name ());
6225 vec
<tree
> known_csts
;
6226 vec
<ipa_polymorphic_call_context
> known_contexts
;
6228 callers
= gather_edges_for_value (val
, node
, caller_count
);
6230 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6233 known_csts
= avals
->m_known_vals
.copy ();
6234 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6236 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6237 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6238 vec
<ipa_argagg_value
, va_gc
> *aggvals
6239 = find_aggregate_values_for_callers_subset (node
, callers
);
6240 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6241 offset
, val
->value
));
6242 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6245 if (val
->self_recursion_generated_p ())
6246 self_gen_clones
->safe_push (val
->spec_node
);
6248 update_profiling_info (node
, val
->spec_node
);
6251 overall_size
+= val
->local_size_cost
;
6252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6253 fprintf (dump_file
, " overall size reached %li\n",
6256 /* TODO: If for some lattice there is only one other known value
6257 left, make a special node for it too. */
6262 /* Like irange::contains_p(), but convert VAL to the range of R if
6266 ipa_range_contains_p (const vrange
&r
, tree val
)
6268 if (r
.undefined_p ())
6271 tree type
= r
.type ();
6272 if (!wi::fits_to_tree_p (wi::to_wide (val
), type
))
6275 val
= fold_convert (type
, val
);
6276 return r
.contains_p (val
);
6279 /* Decide whether and what specialized clones of NODE should be created. */
6282 decide_whether_version_node (struct cgraph_node
*node
)
6284 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6285 int i
, count
= ipa_get_param_count (info
);
6291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6292 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6293 node
->dump_name ());
6295 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6296 ipa_auto_call_arg_values avals
;
6297 gather_context_independent_values (info
, &avals
, false, NULL
);
6299 for (i
= 0; i
< count
;i
++)
6301 if (!ipa_is_param_used (info
, i
))
6304 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6305 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6306 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6309 && !avals
.m_known_vals
[i
])
6311 ipcp_value
<tree
> *val
;
6312 for (val
= lat
->values
; val
; val
= val
->next
)
6314 /* If some values generated for self-recursive calls with
6315 arithmetic jump functions fall outside of the known
6316 range for the parameter, we can skip them. */
6317 if (TREE_CODE (val
->value
) == INTEGER_CST
6318 && !plats
->m_value_range
.bottom_p ()
6319 && !ipa_range_contains_p (plats
->m_value_range
.m_vr
,
6322 /* This can happen also if a constant present in the source
6323 code falls outside of the range of parameter's type, so we
6325 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6327 fprintf (dump_file
, " - skipping%s value ",
6328 val
->self_recursion_generated_p ()
6329 ? " self_recursion_generated" : "");
6330 print_ipcp_constant_value (dump_file
, val
->value
);
6331 fprintf (dump_file
, " because it is outside known "
6336 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6341 if (!plats
->aggs_bottom
)
6343 struct ipcp_agg_lattice
*aglat
;
6344 ipcp_value
<tree
> *val
;
6345 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6346 if (!aglat
->bottom
&& aglat
->values
6347 /* If the following is false, the one value has been considered
6348 for cloning for all contexts. */
6349 && (plats
->aggs_contain_variable
6350 || !aglat
->is_single_const ()))
6351 for (val
= aglat
->values
; val
; val
= val
->next
)
6352 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6357 && avals
.m_known_contexts
[i
].useless_p ())
6359 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6360 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6361 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6366 if (!self_gen_clones
.is_empty ())
6368 self_gen_clones
.safe_push (node
);
6369 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6372 if (info
->do_clone_for_all_contexts
)
6374 if (!dbg_cnt (ipa_cp_values
))
6376 info
->do_clone_for_all_contexts
= false;
6380 struct cgraph_node
*clone
;
6381 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6383 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6385 cgraph_edge
*cs
= callers
[i
];
6386 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6388 if (caller_info
&& caller_info
->node_dead
)
6389 callers
.unordered_remove (i
);
6392 if (!adjust_callers_for_value_intersection (callers
, node
))
6394 /* If node is not called by anyone, or all its caller edges are
6395 self-recursive, the node is not really in use, no need to do
6397 info
->do_clone_for_all_contexts
= false;
6402 fprintf (dump_file
, " - Creating a specialized node of %s "
6403 "for all known contexts.\n", node
->dump_name ());
6405 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6406 vec
<ipa_polymorphic_call_context
> known_contexts
6407 = copy_useful_known_contexts (avals
.m_known_contexts
);
6408 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6409 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6410 vec
<ipa_argagg_value
, va_gc
> *aggvals
6411 = find_aggregate_values_for_callers_subset (node
, callers
);
6413 if (!known_contexts_useful_p (known_contexts
))
6415 known_contexts
.release ();
6416 known_contexts
= vNULL
;
6418 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6420 info
->do_clone_for_all_contexts
= false;
6421 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6428 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6431 spread_undeadness (struct cgraph_node
*node
)
6433 struct cgraph_edge
*cs
;
6435 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6436 if (ipa_edge_within_scc (cs
))
6438 struct cgraph_node
*callee
;
6439 class ipa_node_params
*info
;
6441 callee
= cs
->callee
->function_symbol (NULL
);
6442 info
= ipa_node_params_sum
->get (callee
);
6444 if (info
&& info
->node_dead
)
6446 info
->node_dead
= 0;
6447 spread_undeadness (callee
);
6452 /* Return true if NODE has a caller from outside of its SCC that is not
6453 dead. Worker callback for cgraph_for_node_and_aliases. */
6456 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6457 void *data ATTRIBUTE_UNUSED
)
6459 struct cgraph_edge
*cs
;
6461 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6462 if (cs
->caller
->thunk
6463 && cs
->caller
->call_for_symbol_thunks_and_aliases
6464 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6466 else if (!ipa_edge_within_scc (cs
))
6468 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6469 if (!caller_info
/* Unoptimized caller are like dead ones. */
6470 || !caller_info
->node_dead
)
6477 /* Identify nodes within the same SCC as NODE which are no longer needed
6478 because of new clones and will be removed as unreachable. */
6481 identify_dead_nodes (struct cgraph_node
*node
)
6483 struct cgraph_node
*v
;
6484 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6487 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6489 && !v
->call_for_symbol_thunks_and_aliases
6490 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6491 info
->node_dead
= 1;
6494 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6496 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6497 if (info
&& !info
->node_dead
)
6498 spread_undeadness (v
);
6501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6503 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6504 if (ipa_node_params_sum
->get (v
)
6505 && ipa_node_params_sum
->get (v
)->node_dead
)
6506 fprintf (dump_file
, " Marking node as dead: %s.\n",
6511 /* The decision stage. Iterate over the topological order of call graph nodes
6512 TOPO and make specialized clones if deemed beneficial. */
6515 ipcp_decision_stage (class ipa_topo_info
*topo
)
6520 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6522 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6524 struct cgraph_node
*node
= topo
->order
[i
];
6525 bool change
= false, iterate
= true;
6529 struct cgraph_node
*v
;
6531 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6532 if (v
->has_gimple_body_p ()
6533 && ipcp_versionable_function_p (v
))
6534 iterate
|= decide_whether_version_node (v
);
6539 identify_dead_nodes (node
);
6543 /* Look up all VR and bits information that we have discovered and copy it
6544 over to the transformation summary. */
6547 ipcp_store_vr_results (void)
6551 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6553 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6554 bool dumped_sth
= false;
6555 bool found_useful_result
= false;
6557 bool do_bits
= true;
6559 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6562 fprintf (dump_file
, "Not considering %s for VR discovery "
6563 "and propagate; -fipa-ipa-vrp: disabled.\n",
6564 node
->dump_name ());
6567 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_bit_cp
))
6570 fprintf (dump_file
, "Not considering %s for ipa bitwise "
6571 "propagation ; -fipa-bit-cp: disabled.\n",
6572 node
->dump_name ());
6575 if (!do_bits
&& !do_vr
)
6578 if (info
->ipcp_orig_node
)
6579 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6580 if (!info
->lattices
)
6581 /* Newly expanded artificial thunks do not have lattices. */
6584 unsigned count
= ipa_get_param_count (info
);
6585 for (unsigned i
= 0; i
< count
; i
++)
6587 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6589 && !plats
->m_value_range
.bottom_p ()
6590 && !plats
->m_value_range
.top_p ())
6592 found_useful_result
= true;
6595 if (do_bits
&& plats
->bits_lattice
.constant_p ())
6597 found_useful_result
= true;
6601 if (!found_useful_result
)
6604 ipcp_transformation_initialize ();
6605 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6606 vec_safe_reserve_exact (ts
->m_vr
, count
);
6608 for (unsigned i
= 0; i
< count
; i
++)
6610 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6611 ipcp_bits_lattice
*bits
= NULL
;
6614 && plats
->bits_lattice
.constant_p ()
6615 && dbg_cnt (ipa_cp_bits
))
6616 bits
= &plats
->bits_lattice
;
6619 && !plats
->m_value_range
.bottom_p ()
6620 && !plats
->m_value_range
.top_p ()
6621 && dbg_cnt (ipa_cp_vr
))
6625 Value_Range tmp
= plats
->m_value_range
.m_vr
;
6626 tree type
= ipa_get_type (info
, i
);
6627 irange
&r
= as_a
<irange
> (tmp
);
6628 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6629 TYPE_PRECISION (type
),
6631 wide_int::from (bits
->get_mask (),
6632 TYPE_PRECISION (type
),
6634 r
.update_bitmask (bm
);
6636 ts
->m_vr
->quick_push (vr
);
6640 ipa_vr
vr (plats
->m_value_range
.m_vr
);
6641 ts
->m_vr
->quick_push (vr
);
6646 tree type
= ipa_get_type (info
, i
);
6648 tmp
.set_varying (type
);
6649 irange
&r
= as_a
<irange
> (tmp
);
6650 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6651 TYPE_PRECISION (type
),
6653 wide_int::from (bits
->get_mask (),
6654 TYPE_PRECISION (type
),
6656 r
.update_bitmask (bm
);
6658 ts
->m_vr
->quick_push (vr
);
6663 ts
->m_vr
->quick_push (vr
);
6666 if (!dump_file
|| !bits
)
6671 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6672 node
->dump_name ());
6675 fprintf (dump_file
, " param %i: value = ", i
);
6676 print_hex (bits
->get_value (), dump_file
);
6677 fprintf (dump_file
, ", mask = ");
6678 print_hex (bits
->get_mask (), dump_file
);
6679 fprintf (dump_file
, "\n");
6684 /* The IPCP driver. */
6689 class ipa_topo_info topo
;
6691 if (edge_clone_summaries
== NULL
)
6692 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6694 ipa_check_create_node_params ();
6695 ipa_check_create_edge_args ();
6696 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6700 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6701 if (dump_flags
& TDF_DETAILS
)
6702 ipa_print_all_params (dump_file
);
6703 ipa_print_all_jump_functions (dump_file
);
6706 /* Topological sort. */
6707 build_toporder_info (&topo
);
6708 /* Do the interprocedural propagation. */
6709 ipcp_propagate_stage (&topo
);
6710 /* Decide what constant propagation and cloning should be performed. */
6711 ipcp_decision_stage (&topo
);
6712 /* Store results of value range and bits propagation. */
6713 ipcp_store_vr_results ();
6715 /* Free all IPCP structures. */
6716 delete clone_num_suffixes
;
6717 free_toporder_info (&topo
);
6718 delete edge_clone_summaries
;
6719 edge_clone_summaries
= NULL
;
6720 ipa_free_all_structures_after_ipa_cp ();
6722 fprintf (dump_file
, "\nIPA constant propagation end\n");
6726 /* Initialization and computation of IPCP data structures. This is the initial
6727 intraprocedural analysis of functions, which gathers information to be
6728 propagated later on. */
6731 ipcp_generate_summary (void)
6733 struct cgraph_node
*node
;
6736 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6737 ipa_register_cgraph_hooks ();
6739 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6740 ipa_analyze_node (node
);
6745 const pass_data pass_data_ipa_cp
=
6747 IPA_PASS
, /* type */
6749 OPTGROUP_NONE
, /* optinfo_flags */
6750 TV_IPA_CONSTANT_PROP
, /* tv_id */
6751 0, /* properties_required */
6752 0, /* properties_provided */
6753 0, /* properties_destroyed */
6754 0, /* todo_flags_start */
6755 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6758 class pass_ipa_cp
: public ipa_opt_pass_d
6761 pass_ipa_cp (gcc::context
*ctxt
)
6762 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6763 ipcp_generate_summary
, /* generate_summary */
6764 NULL
, /* write_summary */
6765 NULL
, /* read_summary */
6766 ipcp_write_transformation_summaries
, /*
6767 write_optimization_summary */
6768 ipcp_read_transformation_summaries
, /*
6769 read_optimization_summary */
6770 NULL
, /* stmt_fixup */
6771 0, /* function_transform_todo_flags_start */
6772 ipcp_transform_function
, /* function_transform */
6773 NULL
) /* variable_transform */
6776 /* opt_pass methods: */
6777 bool gate (function
*) final override
6779 /* FIXME: We should remove the optimize check after we ensure we never run
6780 IPA passes when not optimizing. */
6781 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6784 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6786 }; // class pass_ipa_cp
6791 make_pass_ipa_cp (gcc::context
*ctxt
)
6793 return new pass_ipa_cp (ctxt
);
6796 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6797 within the same process. For use by toplev::finalize. */
6800 ipa_cp_cc_finalize (void)
6802 base_count
= profile_count::uninitialized ();
6804 orig_overall_size
= 0;
6805 ipcp_free_transformation_sum ();
6808 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6809 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6810 DECL_UID-sorted vector if available (which is pre-computed only if there are
6811 many parameters). Can return -1 if param is static chain not represented
6812 among DECL_ARGUMENTS. */
6815 ipcp_transformation::get_param_index (const_tree fndecl
, const_tree param
) const
6817 gcc_assert (TREE_CODE (param
) == PARM_DECL
);
6820 unsigned puid
= DECL_UID (param
);
6821 const ipa_uid_to_idx_map_elt
*res
6822 = std::lower_bound (m_uid_to_idx
->begin(), m_uid_to_idx
->end (), puid
,
6823 [] (const ipa_uid_to_idx_map_elt
&elt
, unsigned uid
)
6825 return elt
.uid
< uid
;
6827 if (res
== m_uid_to_idx
->end ()
6828 || res
->uid
!= puid
)
6830 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6837 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6841 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6845 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6846 according to the uid. */
6849 compare_uids (const void *a
, const void *b
)
6851 const ipa_uid_to_idx_map_elt
*e1
= (const ipa_uid_to_idx_map_elt
*) a
;
6852 const ipa_uid_to_idx_map_elt
*e2
= (const ipa_uid_to_idx_map_elt
*) b
;
6853 if (e1
->uid
< e2
->uid
)
6855 if (e1
->uid
> e2
->uid
)
6860 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6861 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6862 from parameters to their indices in DECL_ARGUMENTS chain. */
6865 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl
)
6867 int c
= count_formal_params (fndecl
);
6871 m_uid_to_idx
= NULL
;
6872 vec_safe_reserve (m_uid_to_idx
, c
, true);
6874 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6876 ipa_uid_to_idx_map_elt elt
;
6877 elt
.uid
= DECL_UID (p
);
6879 m_uid_to_idx
->quick_push (elt
);
6881 m_uid_to_idx
->qsort (compare_uids
);