1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
103 #define INCLUDE_ALGORITHM
106 #include "coretypes.h"
109 #include "gimple-expr.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype
> class ipcp_value
;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype
>
138 struct ipcp_value_source
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset
;
144 /* The incoming edge that brought the value. */
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value
<valtype
> *val
;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source
*next
;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit
;
166 /* Time benefit that specializing the function for this value can bring about
168 sreal prop_time_benefit
;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
172 /* Size cost that specializing the function for this value can bring about in
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype
>
184 class ipcp_value
: public ipcp_value_base
187 /* The actual value for the given parameter. */
189 /* The list of sources from which this value originates. */
190 ipcp_value_source
<valtype
> *sources
= nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value
*next
= nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
195 ipcp_value
*scc_next
= nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value
*topo_next
= nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
201 cgraph_node
*spec_node
= nullptr;
202 /* Depth first search number and low link for topological sorting of
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
214 unsigned self_recursion_generated_level
= 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack
= false;
218 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
219 HOST_WIDE_INT offset
);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value
<valtype
> *o
)
225 return o
->scc_no
== scc_no
;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level
> 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype
>
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value
<valtype
> *values
;
252 /* Number of known values and types in this lattice. */
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable
;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval
, cgraph_edge
*cs
,
264 ipcp_value
<valtype
> *src_val
= NULL
,
265 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
266 ipcp_value
<valtype
> **val_p
= NULL
,
267 unsigned same_lat_gen_level
= 0);
268 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
271 /* Lattice of tree values with an offset to describe a part of an
274 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset
;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice
*next
;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
312 bool bottom_p () const { return m_lattice_val
== IPA_BITS_VARYING
; }
313 bool top_p () const { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
314 bool constant_p () const { return m_lattice_val
== IPA_BITS_CONSTANT
; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int
, widest_int
);
317 bool known_nonzero_p () const;
319 widest_int
get_value () const { return m_value
; }
320 widest_int
get_mask () const { return m_mask
; }
322 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
323 enum tree_code
, tree
, bool);
325 bool meet_with (widest_int
, widest_int
, unsigned);
330 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value
, m_mask
;
337 bool meet_with_1 (widest_int
, widest_int
, unsigned, bool);
338 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const value_range
*p_vr
);
352 bool meet_with (const ipcp_vr_lattice
&other
);
353 void init () { gcc_assert (m_vr
.undefined_p ()); }
354 void print (FILE * f
);
357 bool meet_with_1 (const value_range
*other_vr
);
360 /* Structure containing lattices for a parameter itself and for pieces of
361 aggregates that are passed in the parameter or by a reference in a parameter
362 plus some other useful flags. */
364 class ipcp_param_lattices
367 /* Lattice describing the value of the parameter itself. */
368 ipcp_lattice
<tree
> itself
;
369 /* Lattice describing the polymorphic contexts of a parameter. */
370 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
371 /* Lattices describing aggregate parts. */
372 ipcp_agg_lattice
*aggs
;
373 /* Lattice describing known bits. */
374 ipcp_bits_lattice bits_lattice
;
375 /* Lattice describing value range. */
376 ipcp_vr_lattice m_value_range
;
377 /* Number of aggregate lattices */
379 /* True if aggregate data were passed by reference (as opposed to by
382 /* All aggregate lattices contain a variable component (in addition to
384 bool aggs_contain_variable
;
385 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
386 for any propagation). */
389 /* There is a virtual call based on this parameter. */
393 /* Allocation pools for values and their sources in ipa-cp. */
395 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
396 ("IPA-CP constant values");
398 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
399 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
401 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
402 ("IPA-CP value sources");
404 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
405 ("IPA_CP aggregate lattices");
407 /* Base count to use in heuristics when using profile feedback. */
409 static profile_count base_count
;
411 /* Original overall size of the program. */
413 static long overall_size
, orig_overall_size
;
415 /* Node name to unique clone suffix number map. */
416 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
418 /* Return the param lattices structure corresponding to the Ith formal
419 parameter of the function described by INFO. */
420 static inline class ipcp_param_lattices
*
421 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
423 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
424 gcc_checking_assert (!info
->ipcp_orig_node
);
425 gcc_checking_assert (info
->lattices
);
426 return &(info
->lattices
[i
]);
429 /* Return the lattice corresponding to the scalar value of the Ith formal
430 parameter of the function described by INFO. */
431 static inline ipcp_lattice
<tree
> *
432 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
434 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
435 return &plats
->itself
;
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
441 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
443 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
444 return &plats
->ctxlat
;
447 /* Return whether LAT is a lattice with a single constant and without an
450 template <typename valtype
>
452 ipcp_lattice
<valtype
>::is_single_const ()
454 if (bottom
|| contains_variable
|| values_count
!= 1)
460 /* Return true iff X and Y should be considered equal values by IPA-CP. */
463 values_equal_for_ipcp_p (tree x
, tree y
)
465 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
470 if (TREE_CODE (x
) == ADDR_EXPR
471 && TREE_CODE (y
) == ADDR_EXPR
472 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
473 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
474 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
475 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
477 return operand_equal_p (x
, y
, 0);
480 /* Print V which is extracted from a value in a lattice to F. */
483 print_ipcp_constant_value (FILE * f
, tree v
)
485 if (TREE_CODE (v
) == ADDR_EXPR
486 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
489 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
492 print_generic_expr (f
, v
);
495 /* Print V which is extracted from a value in a lattice to F. */
498 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
503 /* Print a lattice LAT to F. */
505 template <typename valtype
>
507 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
509 ipcp_value
<valtype
> *val
;
514 fprintf (f
, "BOTTOM\n");
518 if (!values_count
&& !contains_variable
)
520 fprintf (f
, "TOP\n");
524 if (contains_variable
)
526 fprintf (f
, "VARIABLE");
532 for (val
= values
; val
; val
= val
->next
)
534 if (dump_benefits
&& prev
)
536 else if (!dump_benefits
&& prev
)
541 print_ipcp_constant_value (f
, val
->value
);
545 ipcp_value_source
<valtype
> *s
;
547 if (val
->self_recursion_generated_p ())
548 fprintf (f
, " [self_gen(%i), from:",
549 val
->self_recursion_generated_level
);
551 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
552 for (s
= val
->sources
; s
; s
= s
->next
)
553 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
554 s
->cs
->sreal_frequency ().to_double ());
559 fprintf (f
, " [loc_time: %g, loc_size: %i, "
560 "prop_time: %g, prop_size: %i]\n",
561 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
562 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
569 ipcp_bits_lattice::print (FILE *f
)
572 fprintf (f
, " Bits unknown (TOP)\n");
573 else if (bottom_p ())
574 fprintf (f
, " Bits unusable (BOTTOM)\n");
577 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
578 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
583 /* Print value range lattice to F. */
586 ipcp_vr_lattice::print (FILE * f
)
588 dump_value_range (f
, &m_vr
);
591 /* Print all ipcp_lattices of all functions to F. */
594 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
596 struct cgraph_node
*node
;
599 fprintf (f
, "\nLattices:\n");
600 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
602 class ipa_node_params
*info
;
604 info
= ipa_node_params_sum
->get (node
);
605 /* Skip unoptimized functions and constprop clones since we don't make
606 lattices for them. */
607 if (!info
|| info
->ipcp_orig_node
)
609 fprintf (f
, " Node: %s:\n", node
->dump_name ());
610 count
= ipa_get_param_count (info
);
611 for (i
= 0; i
< count
; i
++)
613 struct ipcp_agg_lattice
*aglat
;
614 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
615 fprintf (f
, " param [%d]: ", i
);
616 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
617 fprintf (f
, " ctxs: ");
618 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
619 plats
->bits_lattice
.print (f
);
621 plats
->m_value_range
.print (f
);
623 if (plats
->virt_call
)
624 fprintf (f
, " virt_call flag set\n");
626 if (plats
->aggs_bottom
)
628 fprintf (f
, " AGGS BOTTOM\n");
631 if (plats
->aggs_contain_variable
)
632 fprintf (f
, " AGGS VARIABLE\n");
633 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
635 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
636 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
637 aglat
->print (f
, dump_sources
, dump_benefits
);
643 /* Determine whether it is at all technically possible to create clones of NODE
644 and store this information in the ipa_node_params structure associated
648 determine_versionability (struct cgraph_node
*node
,
649 class ipa_node_params
*info
)
651 const char *reason
= NULL
;
653 /* There are a number of generic reasons functions cannot be versioned. We
654 also cannot remove parameters if there are type attributes such as fnspec
656 if (node
->alias
|| node
->thunk
)
657 reason
= "alias or thunk";
658 else if (!node
->versionable
)
659 reason
= "not a tree_versionable_function";
660 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
661 reason
= "insufficient body availability";
662 else if (!opt_for_fn (node
->decl
, optimize
)
663 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
664 reason
= "non-optimized function";
665 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
667 /* Ideally we should clone the SIMD clones themselves and create
668 vector copies of them, so IPA-cp and SIMD clones can happily
669 coexist, but that may not be worth the effort. */
670 reason
= "function has SIMD clones";
672 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
674 /* Ideally we should clone the target clones themselves and create
675 copies of them, so IPA-cp and target clones can happily
676 coexist, but that may not be worth the effort. */
677 reason
= "function target_clones attribute";
679 /* Don't clone decls local to a comdat group; it breaks and for C++
680 decloned constructors, inlining is always better anyway. */
681 else if (node
->comdat_local_p ())
682 reason
= "comdat-local function";
683 else if (node
->calls_comdat_local
)
685 /* TODO: call is versionable if we make sure that all
686 callers are inside of a comdat group. */
687 reason
= "calls comdat-local function";
690 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
691 work only when inlined. Cloning them may still lead to better code
692 because ipa-cp will not give up on cloning further. If the function is
693 external this however leads to wrong code because we may end up producing
694 offline copy of the function. */
695 if (DECL_EXTERNAL (node
->decl
))
696 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
697 edge
= edge
->next_callee
)
698 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
700 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
701 reason
= "external function which calls va_arg_pack";
702 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
703 == BUILT_IN_VA_ARG_PACK_LEN
)
704 reason
= "external function which calls va_arg_pack_len";
707 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
708 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
709 node
->dump_name (), reason
);
711 info
->versionable
= (reason
== NULL
);
714 /* Return true if it is at all technically possible to create clones of a
718 ipcp_versionable_function_p (struct cgraph_node
*node
)
720 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
721 return info
&& info
->versionable
;
724 /* Structure holding accumulated information about callers of a node. */
726 struct caller_statistics
728 /* If requested (see below), self-recursive call counts are summed into this
730 profile_count rec_count_sum
;
731 /* The sum of all ipa counts of all the other (non-recursive) calls. */
732 profile_count count_sum
;
733 /* Sum of all frequencies for all calls. */
735 /* Number of calls and hot calls respectively. */
736 int n_calls
, n_hot_calls
;
737 /* If itself is set up, also count the number of non-self-recursive
740 /* If non-NULL, this is the node itself and calls from it should have their
741 counts included in rec_count_sum and not count_sum. */
745 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
746 from IGNORED_CALLER are not counted. */
749 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
751 stats
->rec_count_sum
= profile_count::zero ();
752 stats
->count_sum
= profile_count::zero ();
754 stats
->n_hot_calls
= 0;
755 stats
->n_nonrec_calls
= 0;
757 stats
->itself
= itself
;
760 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
761 non-thunk incoming edges to NODE. */
764 gather_caller_stats (struct cgraph_node
*node
, void *data
)
766 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
767 struct cgraph_edge
*cs
;
769 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
770 if (!cs
->caller
->thunk
)
772 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
773 if (info
&& info
->node_dead
)
776 if (cs
->count
.ipa ().initialized_p ())
778 if (stats
->itself
&& stats
->itself
== cs
->caller
)
779 stats
->rec_count_sum
+= cs
->count
.ipa ();
781 stats
->count_sum
+= cs
->count
.ipa ();
783 stats
->freq_sum
+= cs
->sreal_frequency ();
785 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
786 stats
->n_nonrec_calls
++;
788 if (cs
->maybe_hot_p ())
789 stats
->n_hot_calls
++;
795 /* Return true if this NODE is viable candidate for cloning. */
798 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
800 struct caller_statistics stats
;
802 gcc_checking_assert (node
->has_gimple_body_p ());
804 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
807 fprintf (dump_file
, "Not considering %s for cloning; "
808 "-fipa-cp-clone disabled.\n",
813 if (node
->optimize_for_size_p ())
816 fprintf (dump_file
, "Not considering %s for cloning; "
817 "optimizing it for size.\n",
822 init_caller_stats (&stats
);
823 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
825 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
828 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
833 /* When profile is available and function is hot, propagate into it even if
834 calls seems cold; constant propagation can improve function's speed
836 if (stats
.count_sum
> profile_count::zero ()
837 && node
->count
.ipa ().initialized_p ())
839 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
842 fprintf (dump_file
, "Considering %s for cloning; "
843 "usually called directly.\n",
848 if (!stats
.n_hot_calls
)
851 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
856 fprintf (dump_file
, "Considering %s for cloning.\n",
861 template <typename valtype
>
862 class value_topo_info
865 /* Head of the linked list of topologically sorted values. */
866 ipcp_value
<valtype
> *values_topo
;
867 /* Stack for creating SCCs, represented by a linked list too. */
868 ipcp_value
<valtype
> *stack
;
869 /* Counter driving the algorithm in add_val_to_toposort. */
872 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
874 void add_val (ipcp_value
<valtype
> *cur_val
);
875 void propagate_effects ();
878 /* Arrays representing a topological ordering of call graph nodes and a stack
879 of nodes used during constant propagation and also data required to perform
880 topological sort of values and propagation of benefits in the determined
886 /* Array with obtained topological order of cgraph nodes. */
887 struct cgraph_node
**order
;
888 /* Stack of cgraph nodes used during propagation within SCC until all values
889 in the SCC stabilize. */
890 struct cgraph_node
**stack
;
891 int nnodes
, stack_top
;
893 value_topo_info
<tree
> constants
;
894 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
896 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
901 /* Skip edges from and to nodes without ipa_cp enabled.
902 Ignore not available symbols. */
905 ignore_edge_p (cgraph_edge
*e
)
907 enum availability avail
;
908 cgraph_node
*ultimate_target
909 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
911 return (avail
<= AVAIL_INTERPOSABLE
912 || !opt_for_fn (ultimate_target
->decl
, optimize
)
913 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
916 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
919 build_toporder_info (class ipa_topo_info
*topo
)
921 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
922 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
924 gcc_checking_assert (topo
->stack_top
== 0);
925 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
929 /* Free information about strongly connected components and the arrays in
933 free_toporder_info (class ipa_topo_info
*topo
)
935 ipa_free_postorder_info ();
940 /* Add NODE to the stack in TOPO, unless it is already there. */
943 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
945 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
946 if (info
->node_enqueued
)
948 info
->node_enqueued
= 1;
949 topo
->stack
[topo
->stack_top
++] = node
;
952 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
955 static struct cgraph_node
*
956 pop_node_from_stack (class ipa_topo_info
*topo
)
960 struct cgraph_node
*node
;
962 node
= topo
->stack
[topo
->stack_top
];
963 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
970 /* Set lattice LAT to bottom and return true if it previously was not set as
973 template <typename valtype
>
975 ipcp_lattice
<valtype
>::set_to_bottom ()
982 /* Mark lattice as containing an unknown value and return true if it previously
983 was not marked as such. */
985 template <typename valtype
>
987 ipcp_lattice
<valtype
>::set_contains_variable ()
989 bool ret
= !contains_variable
;
990 contains_variable
= true;
994 /* Set all aggregate lattices in PLATS to bottom and return true if they were
995 not previously set as such. */
998 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
1000 bool ret
= !plats
->aggs_bottom
;
1001 plats
->aggs_bottom
= true;
1005 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1006 return true if they were not previously marked as such. */
1009 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
1011 bool ret
= !plats
->aggs_contain_variable
;
1012 plats
->aggs_contain_variable
= true;
1017 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
1019 return meet_with_1 (&other
.m_vr
);
1022 /* Meet the current value of the lattice with value range described by VR
1026 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
1028 return meet_with_1 (p_vr
);
1031 /* Meet the current value of the lattice with value range described by
1032 OTHER_VR lattice. Return TRUE if anything changed. */
1035 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
1040 if (other_vr
->varying_p ())
1041 return set_to_bottom ();
1046 value_range
save (m_vr
);
1047 res
= m_vr
.union_ (*other_vr
);
1048 gcc_assert (res
== (m_vr
!= save
));
1051 res
= m_vr
.union_ (*other_vr
);
1055 /* Return true if value range information in the lattice is yet unknown. */
1058 ipcp_vr_lattice::top_p () const
1060 return m_vr
.undefined_p ();
1063 /* Return true if value range information in the lattice is known to be
1067 ipcp_vr_lattice::bottom_p () const
1069 return m_vr
.varying_p ();
1072 /* Set value range information in the lattice to bottom. Return true if it
1073 previously was in a different state. */
1076 ipcp_vr_lattice::set_to_bottom ()
1078 if (m_vr
.varying_p ())
1080 /* ?? We create all sorts of VARYING ranges for floats, structures,
1081 and other types which we cannot handle as ranges. We should
1082 probably avoid handling them throughout the pass, but it's easier
1083 to create a sensible VARYING here and let the lattice
1085 m_vr
.set_varying (integer_type_node
);
1089 /* Set lattice value to bottom, if it already isn't the case. */
1092 ipcp_bits_lattice::set_to_bottom ()
1096 m_lattice_val
= IPA_BITS_VARYING
;
1102 /* Set to constant if it isn't already. Only meant to be called
1103 when switching state from TOP. */
1106 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1108 gcc_assert (top_p ());
1109 m_lattice_val
= IPA_BITS_CONSTANT
;
1110 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1115 /* Return true if any of the known bits are non-zero. */
1118 ipcp_bits_lattice::known_nonzero_p () const
1122 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
1125 /* Convert operand to value, mask form. */
1128 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1130 wide_int
get_nonzero_bits (const_tree
);
1132 if (TREE_CODE (operand
) == INTEGER_CST
)
1134 *valuep
= wi::to_widest (operand
);
1144 /* Meet operation, similar to ccp_lattice_meet, we xor values
1145 if this->value, value have different values at same bit positions, we want
1146 to drop that bit to varying. Return true if mask is changed.
1147 This function assumes that the lattice value is in CONSTANT state. If
1148 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1151 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1152 unsigned precision
, bool drop_all_ones
)
1154 gcc_assert (constant_p ());
1156 widest_int old_mask
= m_mask
;
1157 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1162 if (wi::sext (m_mask
, precision
) == -1)
1163 return set_to_bottom ();
1165 return m_mask
!= old_mask
;
1168 /* Meet the bits lattice with operand
1169 described by <value, mask, sgn, precision. */
1172 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1180 if (wi::sext (mask
, precision
) == -1)
1181 return set_to_bottom ();
1182 return set_to_constant (value
, mask
);
1185 return meet_with_1 (value
, mask
, precision
, false);
1188 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1189 if code is binary operation or bit_value_unop (other) if code is unary op.
1190 In the case when code is nop_expr, no adjustment is required. If
1191 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1194 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1195 signop sgn
, enum tree_code code
, tree operand
,
1198 if (other
.bottom_p ())
1199 return set_to_bottom ();
1201 if (bottom_p () || other
.top_p ())
1204 widest_int adjusted_value
, adjusted_mask
;
1206 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1208 tree type
= TREE_TYPE (operand
);
1209 widest_int o_value
, o_mask
;
1210 get_value_and_mask (operand
, &o_value
, &o_mask
);
1212 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1213 sgn
, precision
, other
.get_value (), other
.get_mask (),
1214 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1216 if (wi::sext (adjusted_mask
, precision
) == -1)
1217 return set_to_bottom ();
1220 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1222 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1223 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1226 if (wi::sext (adjusted_mask
, precision
) == -1)
1227 return set_to_bottom ();
1231 return set_to_bottom ();
1237 adjusted_mask
|= adjusted_value
;
1238 adjusted_value
&= ~adjusted_mask
;
1240 if (wi::sext (adjusted_mask
, precision
) == -1)
1241 return set_to_bottom ();
1242 return set_to_constant (adjusted_value
, adjusted_mask
);
1245 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
1249 /* Dump the contents of the list to FILE. */
1252 ipa_argagg_value_list::dump (FILE *f
)
1255 for (const ipa_argagg_value
&av
: m_elts
)
1257 fprintf (f
, "%s %i[%u]=", comma
? "," : "",
1258 av
.index
, av
.unit_offset
);
1259 print_generic_expr (f
, av
.value
);
1261 fprintf (f
, "(by_ref)");
1267 /* Dump the contents of the list to stderr. */
1270 ipa_argagg_value_list::debug ()
1275 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1276 NULL if there is no such constant. */
1278 const ipa_argagg_value
*
1279 ipa_argagg_value_list::get_elt (int index
, unsigned unit_offset
) const
1281 ipa_argagg_value key
;
1283 key
.unit_offset
= unit_offset
;
1284 const ipa_argagg_value
*res
1285 = std::lower_bound (m_elts
.begin (), m_elts
.end (), key
,
1286 [] (const ipa_argagg_value
&elt
,
1287 const ipa_argagg_value
&val
)
1289 if (elt
.index
< val
.index
)
1291 if (elt
.index
> val
.index
)
1293 if (elt
.unit_offset
< val
.unit_offset
)
1298 if (res
== m_elts
.end ()
1299 || res
->index
!= index
1300 || res
->unit_offset
!= unit_offset
)
1303 /* TODO: perhaps remove the check (that the underlying array is indeed
1304 sorted) if it turns out it can be too slow? */
1308 const ipa_argagg_value
*slow_res
= NULL
;
1309 int prev_index
= -1;
1310 unsigned prev_unit_offset
= 0;
1311 for (const ipa_argagg_value
&av
: m_elts
)
1313 gcc_assert (prev_index
< 0
1314 || prev_index
< av
.index
1315 || prev_unit_offset
< av
.unit_offset
);
1316 prev_index
= av
.index
;
1317 prev_unit_offset
= av
.unit_offset
;
1318 if (av
.index
== index
1319 && av
.unit_offset
== unit_offset
)
1322 gcc_assert (res
== slow_res
);
1327 /* Return the first item describing a constant stored for parameter with INDEX,
1328 regardless of offset or reference, or NULL if there is no such constant. */
1330 const ipa_argagg_value
*
1331 ipa_argagg_value_list::get_elt_for_index (int index
) const
1333 const ipa_argagg_value
*res
1334 = std::lower_bound (m_elts
.begin (), m_elts
.end (), index
,
1335 [] (const ipa_argagg_value
&elt
, unsigned idx
)
1337 return elt
.index
< idx
;
1339 if (res
== m_elts
.end ()
1340 || res
->index
!= index
)
1345 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1346 performing any check of whether value is passed by reference, or NULL_TREE
1347 if there is no such constant. */
1350 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
) const
1352 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1353 return av
? av
->value
: NULL_TREE
;
1356 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1357 passed by reference or not according to BY_REF, or NULL_TREE if there is
1358 no such constant. */
1361 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
,
1364 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1365 if (av
&& av
->by_ref
== by_ref
)
1370 /* Return true if all elements present in OTHER are also present in this
1374 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list
&other
) const
1377 for (unsigned i
= 0; i
< other
.m_elts
.size (); i
++)
1379 unsigned other_index
= other
.m_elts
[i
].index
;
1380 unsigned other_offset
= other
.m_elts
[i
].unit_offset
;
1382 while (j
< m_elts
.size ()
1383 && (m_elts
[j
].index
< other_index
1384 || (m_elts
[j
].index
== other_index
1385 && m_elts
[j
].unit_offset
< other_offset
)))
1388 if (j
>= m_elts
.size ()
1389 || m_elts
[j
].index
!= other_index
1390 || m_elts
[j
].unit_offset
!= other_offset
1391 || m_elts
[j
].by_ref
!= other
.m_elts
[i
].by_ref
1393 || !values_equal_for_ipcp_p (m_elts
[j
].value
, other
.m_elts
[i
].value
))
1399 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1400 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1401 offsets but skip those which would end up with a negative offset. */
1404 ipa_argagg_value_list::push_adjusted_values (unsigned src_index
,
1405 unsigned dest_index
,
1406 unsigned unit_delta
,
1407 vec
<ipa_argagg_value
> *res
) const
1409 const ipa_argagg_value
*av
= get_elt_for_index (src_index
);
1412 unsigned prev_unit_offset
= 0;
1414 for (; av
< m_elts
.end (); ++av
)
1416 if (av
->index
> src_index
)
1418 if (av
->index
== src_index
1419 && (av
->unit_offset
>= unit_delta
)
1422 ipa_argagg_value new_av
;
1423 gcc_checking_assert (av
->value
);
1424 new_av
.value
= av
->value
;
1425 new_av
.unit_offset
= av
->unit_offset
- unit_delta
;
1426 new_av
.index
= dest_index
;
1427 new_av
.by_ref
= av
->by_ref
;
1429 /* Quick check that the offsets we push are indeed increasing. */
1431 || new_av
.unit_offset
> prev_unit_offset
);
1432 prev_unit_offset
= new_av
.unit_offset
;
1435 res
->safe_push (new_av
);
1440 /* Push to RES information about single lattices describing aggregate values in
1441 PLATS as those describing parameter DEST_INDEX and the original offset minus
1442 UNIT_DELTA. Return true if any item has been pushed to RES. */
1445 push_agg_values_from_plats (ipcp_param_lattices
*plats
, int dest_index
,
1446 unsigned unit_delta
,
1447 vec
<ipa_argagg_value
> *res
)
1449 if (plats
->aggs_contain_variable
)
1452 bool pushed_sth
= false;
1454 unsigned prev_unit_offset
= 0;
1455 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
1456 if (aglat
->is_single_const ()
1457 && (aglat
->offset
/ BITS_PER_UNIT
- unit_delta
) >= 0)
1459 ipa_argagg_value iav
;
1460 iav
.value
= aglat
->values
->value
;
1461 iav
.unit_offset
= aglat
->offset
/ BITS_PER_UNIT
- unit_delta
;
1462 iav
.index
= dest_index
;
1463 iav
.by_ref
= plats
->aggs_by_ref
;
1466 || iav
.unit_offset
> prev_unit_offset
);
1467 prev_unit_offset
= iav
.unit_offset
;
1471 res
->safe_push (iav
);
1476 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1477 Return the number of remaining valid entries. */
1480 intersect_argaggs_with (vec
<ipa_argagg_value
> &elts
,
1481 const vec
<ipa_argagg_value
> &other
)
1483 unsigned valid_entries
= 0;
1485 for (unsigned i
= 0; i
< elts
.length (); i
++)
1490 unsigned this_index
= elts
[i
].index
;
1491 unsigned this_offset
= elts
[i
].unit_offset
;
1493 while (j
< other
.length ()
1494 && (other
[j
].index
< this_index
1495 || (other
[j
].index
== this_index
1496 && other
[j
].unit_offset
< this_offset
)))
1499 if (j
>= other
.length ())
1501 elts
[i
].value
= NULL_TREE
;
1505 if (other
[j
].index
== this_index
1506 && other
[j
].unit_offset
== this_offset
1507 && other
[j
].by_ref
== elts
[i
].by_ref
1509 && values_equal_for_ipcp_p (other
[j
].value
, elts
[i
].value
))
1512 elts
[i
].value
= NULL_TREE
;
1514 return valid_entries
;
1517 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1518 return true is any of them has not been marked as such so far. */
1521 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1524 ret
= plats
->itself
.set_contains_variable ();
1525 ret
|= plats
->ctxlat
.set_contains_variable ();
1526 ret
|= set_agg_lats_contain_variable (plats
);
1527 ret
|= plats
->bits_lattice
.set_to_bottom ();
1528 ret
|= plats
->m_value_range
.set_to_bottom ();
1532 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1533 points to by the number of callers to NODE. */
1536 count_callers (cgraph_node
*node
, void *data
)
1538 int *caller_count
= (int *) data
;
1540 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1541 /* Local thunks can be handled transparently, but if the thunk cannot
1542 be optimized out, count it as a real use. */
1543 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1548 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1549 the one caller of some other node. Set the caller's corresponding flag. */
1552 set_single_call_flag (cgraph_node
*node
, void *)
1554 cgraph_edge
*cs
= node
->callers
;
1555 /* Local thunks can be handled transparently, skip them. */
1556 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1557 cs
= cs
->next_caller
;
1559 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1561 info
->node_calling_single_call
= true;
1567 /* Initialize ipcp_lattices. */
1570 initialize_node_lattices (struct cgraph_node
*node
)
1572 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1573 struct cgraph_edge
*ie
;
1574 bool disable
= false, variable
= false;
1577 gcc_checking_assert (node
->has_gimple_body_p ());
1579 if (!ipa_get_param_count (info
))
1581 else if (node
->local
)
1583 int caller_count
= 0;
1584 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1586 gcc_checking_assert (caller_count
> 0);
1587 if (caller_count
== 1)
1588 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1593 /* When cloning is allowed, we can assume that externally visible
1594 functions are not called. We will compensate this by cloning
1596 if (ipcp_versionable_function_p (node
)
1597 && ipcp_cloning_candidate_p (node
))
1603 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1604 && !node
->alias
&& !node
->thunk
)
1606 fprintf (dump_file
, "Initializing lattices of %s\n",
1607 node
->dump_name ());
1608 if (disable
|| variable
)
1609 fprintf (dump_file
, " Marking all lattices as %s\n",
1610 disable
? "BOTTOM" : "VARIABLE");
1613 auto_vec
<bool, 16> surviving_params
;
1614 bool pre_modified
= false;
1616 clone_info
*cinfo
= clone_info::get (node
);
1618 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1620 /* At the moment all IPA optimizations should use the number of
1621 parameters of the prevailing decl as the m_always_copy_start.
1622 Handling any other value would complicate the code below, so for the
1623 time bing let's only assert it is so. */
1624 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1625 == ipa_get_param_count (info
))
1626 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1628 pre_modified
= true;
1629 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1631 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1632 && !node
->alias
&& !node
->thunk
)
1635 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1637 if (j
< (int) surviving_params
.length ()
1638 && surviving_params
[j
])
1643 " The following parameters are dead on arrival:");
1646 fprintf (dump_file
, " %u", j
);
1649 fprintf (dump_file
, "\n");
1653 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1655 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1657 || !ipa_get_type (info
, i
)
1658 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1659 || !surviving_params
[i
])))
1661 plats
->itself
.set_to_bottom ();
1662 plats
->ctxlat
.set_to_bottom ();
1663 set_agg_lats_to_bottom (plats
);
1664 plats
->bits_lattice
.set_to_bottom ();
1665 plats
->m_value_range
.m_vr
= value_range ();
1666 plats
->m_value_range
.set_to_bottom ();
1670 plats
->m_value_range
.init ();
1672 set_all_contains_variable (plats
);
1676 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1677 if (ie
->indirect_info
->polymorphic
1678 && ie
->indirect_info
->param_index
>= 0)
1680 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1681 ipa_get_parm_lattices (info
,
1682 ie
->indirect_info
->param_index
)->virt_call
= 1;
1686 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1690 ipacp_value_safe_for_type (tree param_type
, tree value
)
1692 tree val_type
= TREE_TYPE (value
);
1693 if (param_type
== val_type
1694 || useless_type_conversion_p (param_type
, val_type
)
1695 || fold_convertible_p (param_type
, value
))
1701 /* Return the result of a (possibly arithmetic) operation on the constant
1702 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1703 the type of the parameter to which the result is passed. Return
1704 NULL_TREE if that cannot be determined or be considered an
1705 interprocedural invariant. */
1708 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1713 if (opcode
== NOP_EXPR
)
1715 if (!is_gimple_ip_invariant (input
))
1718 if (opcode
== ASSERT_EXPR
)
1720 if (values_equal_for_ipcp_p (input
, operand
))
1728 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1729 res_type
= boolean_type_node
;
1730 else if (expr_type_first_operand_type_p (opcode
))
1731 res_type
= TREE_TYPE (input
);
1736 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1737 res
= fold_unary (opcode
, res_type
, input
);
1739 res
= fold_binary (opcode
, res_type
, input
, operand
);
1741 if (res
&& !is_gimple_ip_invariant (res
))
1747 /* Return the result of a (possibly arithmetic) pass through jump function
1748 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1749 to which the result is passed. Return NULL_TREE if that cannot be
1750 determined or be considered an interprocedural invariant. */
1753 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1756 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1758 ipa_get_jf_pass_through_operand (jfunc
),
1762 /* Return the result of an ancestor jump function JFUNC on the constant value
1763 INPUT. Return NULL_TREE if that cannot be determined. */
1766 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1768 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1769 if (TREE_CODE (input
) == ADDR_EXPR
)
1771 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1772 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1773 if (known_eq (off
, 0))
1775 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1776 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1777 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1778 build_int_cst (ptr_type_node
, byte_offset
)));
1780 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1787 /* Determine whether JFUNC evaluates to a single known constant value and if
1788 so, return it. Otherwise return NULL. INFO describes the caller node or
1789 the one it is inlined to, so that pass-through jump functions can be
1790 evaluated. PARM_TYPE is the type of the parameter to which the result is
1794 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1797 if (jfunc
->type
== IPA_JF_CONST
)
1798 return ipa_get_jf_constant (jfunc
);
1799 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1800 || jfunc
->type
== IPA_JF_ANCESTOR
)
1805 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1806 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1808 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1810 if (info
->ipcp_orig_node
)
1811 input
= info
->known_csts
[idx
];
1814 ipcp_lattice
<tree
> *lat
;
1817 || idx
>= ipa_get_param_count (info
))
1819 lat
= ipa_get_scalar_lat (info
, idx
);
1820 if (!lat
->is_single_const ())
1822 input
= lat
->values
->value
;
1828 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1829 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1831 return ipa_get_jf_ancestor_result (jfunc
, input
);
1837 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1838 that INFO describes the caller node or the one it is inlined to, CS is the
1839 call graph edge corresponding to JFUNC and CSIDX index of the described
1842 ipa_polymorphic_call_context
1843 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1844 ipa_jump_func
*jfunc
)
1846 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1847 ipa_polymorphic_call_context ctx
;
1848 ipa_polymorphic_call_context
*edge_ctx
1849 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1851 if (edge_ctx
&& !edge_ctx
->useless_p ())
1854 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1855 || jfunc
->type
== IPA_JF_ANCESTOR
)
1857 ipa_polymorphic_call_context srcctx
;
1859 bool type_preserved
= true;
1860 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1862 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1864 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1865 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1869 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1870 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1872 if (info
->ipcp_orig_node
)
1874 if (info
->known_contexts
.exists ())
1875 srcctx
= info
->known_contexts
[srcidx
];
1880 || srcidx
>= ipa_get_param_count (info
))
1882 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1883 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1884 if (!lat
->is_single_const ())
1886 srcctx
= lat
->values
->value
;
1888 if (srcctx
.useless_p ())
1890 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1891 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1892 if (!type_preserved
)
1893 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1894 srcctx
.combine_with (ctx
);
1901 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1902 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1903 the result is a range or an anti-range. */
1906 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1907 value_range
*src_vr
,
1908 enum tree_code operation
,
1909 tree dst_type
, tree src_type
)
1911 if (!irange::supports_p (dst_type
) || !irange::supports_p (src_type
))
1914 range_op_handler
handler (operation
, dst_type
);
1916 && handler
.fold_range (*dst_vr
, dst_type
,
1917 *src_vr
, value_range (dst_type
))
1918 && !dst_vr
->varying_p ()
1919 && !dst_vr
->undefined_p ());
1922 /* Determine range of JFUNC given that INFO describes the caller node or
1923 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1924 and PARM_TYPE of the parameter. */
1927 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1928 ipa_jump_func
*jfunc
, tree parm_type
)
1932 ipa_vr_operation_and_type_effects (&vr
,
1934 NOP_EXPR
, parm_type
,
1935 jfunc
->m_vr
->type ());
1936 if (vr
.singleton_p ())
1938 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1941 ipcp_transformation
*sum
1942 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1943 ? cs
->caller
->inlined_to
1945 if (!sum
|| !sum
->m_vr
)
1948 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1950 if (!(*sum
->m_vr
)[idx
].known_p ())
1952 tree vr_type
= ipa_get_type (info
, idx
);
1954 (*sum
->m_vr
)[idx
].get_vrange (srcvr
);
1956 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1958 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1962 if (ipa_vr_operation_and_type_effects (&res
,
1964 operation
, parm_type
,
1970 value_range op_res
, res
;
1971 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1973 range_op_handler
handler (operation
, vr_type
);
1975 ipa_range_set_and_normalize (op_vr
, op
);
1978 || !op_res
.supports_type_p (vr_type
)
1979 || !handler
.fold_range (op_res
, vr_type
, srcvr
, op_vr
))
1980 op_res
.set_varying (vr_type
);
1982 if (ipa_vr_operation_and_type_effects (&res
,
1984 NOP_EXPR
, parm_type
,
1992 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1993 single known constant value and if so, return it. Otherwise return NULL.
1994 NODE and INFO describes the caller node or the one it is inlined to, and
1995 its related info. */
1998 ipa_agg_value_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
1999 const ipa_agg_jf_item
*item
)
2001 tree value
= NULL_TREE
;
2004 if (item
->offset
< 0
2005 || item
->jftype
== IPA_JF_UNKNOWN
2006 || item
->offset
>= (HOST_WIDE_INT
) UINT_MAX
* BITS_PER_UNIT
)
2009 if (item
->jftype
== IPA_JF_CONST
)
2010 return item
->value
.constant
;
2012 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2013 || item
->jftype
== IPA_JF_LOAD_AGG
);
2015 src_idx
= item
->value
.pass_through
.formal_id
;
2017 if (info
->ipcp_orig_node
)
2019 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2020 value
= info
->known_csts
[src_idx
];
2021 else if (ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
))
2023 ipa_argagg_value_list
avl (ts
);
2024 value
= avl
.get_value (src_idx
,
2025 item
->value
.load_agg
.offset
/ BITS_PER_UNIT
,
2026 item
->value
.load_agg
.by_ref
);
2029 else if (info
->lattices
)
2031 class ipcp_param_lattices
*src_plats
2032 = ipa_get_parm_lattices (info
, src_idx
);
2034 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2036 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
2038 if (!lat
->is_single_const ())
2041 value
= lat
->values
->value
;
2043 else if (src_plats
->aggs
2044 && !src_plats
->aggs_bottom
2045 && !src_plats
->aggs_contain_variable
2046 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
2048 struct ipcp_agg_lattice
*aglat
;
2050 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
2052 if (aglat
->offset
> item
->value
.load_agg
.offset
)
2055 if (aglat
->offset
== item
->value
.load_agg
.offset
)
2057 if (aglat
->is_single_const ())
2058 value
= aglat
->values
->value
;
2068 if (item
->jftype
== IPA_JF_LOAD_AGG
)
2070 tree load_type
= item
->value
.load_agg
.type
;
2071 tree value_type
= TREE_TYPE (value
);
2073 /* Ensure value type is compatible with load type. */
2074 if (!useless_type_conversion_p (load_type
, value_type
))
2078 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
2080 item
->value
.pass_through
.operand
,
2084 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2085 caller is inlined to) NODE which described by INFO and push the results to
2086 RES as describing values passed in parameter DST_INDEX. */
2089 ipa_push_agg_values_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
2090 ipa_agg_jump_function
*agg_jfunc
,
2092 vec
<ipa_argagg_value
> *res
)
2094 unsigned prev_unit_offset
= 0;
2097 for (const ipa_agg_jf_item
&item
: agg_jfunc
->items
)
2099 tree value
= ipa_agg_value_from_jfunc (info
, node
, &item
);
2103 ipa_argagg_value iav
;
2105 iav
.unit_offset
= item
.offset
/ BITS_PER_UNIT
;
2106 iav
.index
= dst_index
;
2107 iav
.by_ref
= agg_jfunc
->by_ref
;
2110 || iav
.unit_offset
> prev_unit_offset
);
2111 prev_unit_offset
= iav
.unit_offset
;
2114 res
->safe_push (iav
);
2118 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2119 bottom, not containing a variable component and without any known value at
2123 ipcp_verify_propagated_values (void)
2125 struct cgraph_node
*node
;
2127 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2129 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
2130 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
2131 || !opt_for_fn (node
->decl
, optimize
))
2133 int i
, count
= ipa_get_param_count (info
);
2135 for (i
= 0; i
< count
; i
++)
2137 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
2140 && !lat
->contains_variable
2141 && lat
->values_count
== 0)
2145 symtab
->dump (dump_file
);
2146 fprintf (dump_file
, "\nIPA lattices after constant "
2147 "propagation, before gcc_unreachable:\n");
2148 print_all_lattices (dump_file
, true, false);
2157 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2160 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
2161 ipa_polymorphic_call_context y
)
2163 return x
.equal_to (y
);
2167 /* Add a new value source to the value represented by THIS, marking that a
2168 value comes from edge CS and (if the underlying jump function is a
2169 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2170 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2171 scalar value of the parameter itself or the offset within an aggregate. */
2173 template <typename valtype
>
2175 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
2176 int src_idx
, HOST_WIDE_INT offset
)
2178 ipcp_value_source
<valtype
> *src
;
2180 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
2181 src
->offset
= offset
;
2184 src
->index
= src_idx
;
2186 src
->next
= sources
;
2190 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2191 SOURCE and clear all other fields. */
2193 static ipcp_value
<tree
> *
2194 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
2196 ipcp_value
<tree
> *val
;
2198 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
2200 val
->self_recursion_generated_level
= same_lat_gen_level
;
2204 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2205 value to SOURCE and clear all other fields. */
2207 static ipcp_value
<ipa_polymorphic_call_context
> *
2208 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
2209 unsigned same_lat_gen_level
)
2211 ipcp_value
<ipa_polymorphic_call_context
> *val
;
2213 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
2214 ipcp_value
<ipa_polymorphic_call_context
>();
2216 val
->self_recursion_generated_level
= same_lat_gen_level
;
2220 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2221 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2222 meaning. OFFSET -1 means the source is scalar and not a part of an
2223 aggregate. If non-NULL, VAL_P records address of existing or newly added
2226 If the value is generated for a self-recursive call as a result of an
2227 arithmetic pass-through jump-function acting on a value in the same lattice,
2228 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2229 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2231 template <typename valtype
>
2233 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
2234 ipcp_value
<valtype
> *src_val
,
2235 int src_idx
, HOST_WIDE_INT offset
,
2236 ipcp_value
<valtype
> **val_p
,
2237 unsigned same_lat_gen_level
)
2239 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
2247 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
2248 if (values_equal_for_ipcp_p (val
->value
, newval
))
2253 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
2254 val
->self_recursion_generated_level
= same_lat_gen_level
;
2256 if (ipa_edge_within_scc (cs
))
2258 ipcp_value_source
<valtype
> *s
;
2259 for (s
= val
->sources
; s
; s
= s
->next
)
2260 if (s
->cs
== cs
&& s
->val
== src_val
)
2266 val
->add_source (cs
, src_val
, src_idx
, offset
);
2270 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
2271 param_ipa_cp_value_list_size
))
2273 /* We can only free sources, not the values themselves, because sources
2274 of other values in this SCC might point to them. */
2275 for (val
= values
; val
; val
= val
->next
)
2277 while (val
->sources
)
2279 ipcp_value_source
<valtype
> *src
= val
->sources
;
2280 val
->sources
= src
->next
;
2281 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2285 return set_to_bottom ();
2289 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2290 val
->add_source (cs
, src_val
, src_idx
, offset
);
2293 /* Add the new value to end of value list, which can reduce iterations
2294 of propagation stage for recursive function. */
2296 last_val
->next
= val
;
2306 /* A helper function that returns result of operation specified by OPCODE on
2307 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2308 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2309 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2313 get_val_across_arith_op (enum tree_code opcode
,
2316 ipcp_value
<tree
> *src_val
,
2319 tree opnd1
= src_val
->value
;
2321 /* Skip source values that is incompatible with specified type. */
2323 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2326 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2329 /* Propagate values through an arithmetic transformation described by a jump
2330 function associated with edge CS, taking values from SRC_LAT and putting
2331 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2332 OPND2 is a constant value if transformation is a binary operation.
2333 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2334 a part of the aggregate. SRC_IDX is the index of the source parameter.
2335 RES_TYPE is the value type of result being propagated into. Return true if
2336 DEST_LAT changed. */
2339 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2340 enum tree_code opcode
,
2343 ipcp_lattice
<tree
> *src_lat
,
2344 ipcp_lattice
<tree
> *dest_lat
,
2345 HOST_WIDE_INT src_offset
,
2349 ipcp_value
<tree
> *src_val
;
2352 /* Due to circular dependencies, propagating within an SCC through arithmetic
2353 transformation would create infinite number of values. But for
2354 self-feeding recursive function, we could allow propagation in a limited
2355 count, and this can enable a simple kind of recursive function versioning.
2356 For other scenario, we would just make lattices bottom. */
2357 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2361 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2362 param_ipa_cp_max_recursive_depth
);
2363 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2364 return dest_lat
->set_contains_variable ();
2366 /* No benefit if recursive execution is in low probability. */
2367 if (cs
->sreal_frequency () * 100
2368 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2369 param_ipa_cp_min_recursive_probability
))
2370 return dest_lat
->set_contains_variable ();
2372 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2374 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2376 /* Now we do not use self-recursively generated value as propagation
2377 source, this is absolutely conservative, but could avoid explosion
2378 of lattice's value space, especially when one recursive function
2379 calls another recursive. */
2380 if (src_val
->self_recursion_generated_p ())
2382 ipcp_value_source
<tree
> *s
;
2384 /* If the lattice has already been propagated for the call site,
2385 no need to do that again. */
2386 for (s
= src_val
->sources
; s
; s
= s
->next
)
2388 return dest_lat
->set_contains_variable ();
2391 val_seeds
.safe_push (src_val
);
2394 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2396 /* Recursively generate lattice values with a limited count. */
2397 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2399 for (int j
= 1; j
< max_recursive_depth
; j
++)
2401 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2404 || !ipacp_value_safe_for_type (res_type
, cstval
))
2407 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2408 src_offset
, &src_val
, j
);
2409 gcc_checking_assert (src_val
);
2412 ret
|= dest_lat
->set_contains_variable ();
2415 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2417 /* Now we do not use self-recursively generated value as propagation
2418 source, otherwise it is easy to make value space of normal lattice
2420 if (src_val
->self_recursion_generated_p ())
2422 ret
|= dest_lat
->set_contains_variable ();
2426 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2429 && ipacp_value_safe_for_type (res_type
, cstval
))
2430 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2433 ret
|= dest_lat
->set_contains_variable ();
2439 /* Propagate values through a pass-through jump function JFUNC associated with
2440 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2441 is the index of the source parameter. PARM_TYPE is the type of the
2442 parameter to which the result is passed. */
2445 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2446 ipcp_lattice
<tree
> *src_lat
,
2447 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2450 return propagate_vals_across_arith_jfunc (cs
,
2451 ipa_get_jf_pass_through_operation (jfunc
),
2453 ipa_get_jf_pass_through_operand (jfunc
),
2454 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2457 /* Propagate values through an ancestor jump function JFUNC associated with
2458 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2459 is the index of the source parameter. */
2462 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2463 struct ipa_jump_func
*jfunc
,
2464 ipcp_lattice
<tree
> *src_lat
,
2465 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2468 ipcp_value
<tree
> *src_val
;
2471 if (ipa_edge_within_scc (cs
))
2472 return dest_lat
->set_contains_variable ();
2474 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2476 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2478 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2479 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2481 ret
|= dest_lat
->set_contains_variable ();
2487 /* Propagate scalar values across jump function JFUNC that is associated with
2488 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2489 parameter to which the result is passed. */
2492 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2493 struct ipa_jump_func
*jfunc
,
2494 ipcp_lattice
<tree
> *dest_lat
,
2497 if (dest_lat
->bottom
)
2500 if (jfunc
->type
== IPA_JF_CONST
)
2502 tree val
= ipa_get_jf_constant (jfunc
);
2503 if (ipacp_value_safe_for_type (param_type
, val
))
2504 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2506 return dest_lat
->set_contains_variable ();
2508 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2509 || jfunc
->type
== IPA_JF_ANCESTOR
)
2511 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2512 ipcp_lattice
<tree
> *src_lat
;
2516 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2517 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2519 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2521 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2522 if (src_lat
->bottom
)
2523 return dest_lat
->set_contains_variable ();
2525 /* If we would need to clone the caller and cannot, do not propagate. */
2526 if (!ipcp_versionable_function_p (cs
->caller
)
2527 && (src_lat
->contains_variable
2528 || (src_lat
->values_count
> 1)))
2529 return dest_lat
->set_contains_variable ();
2531 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2532 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2536 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2537 src_idx
, param_type
);
2539 if (src_lat
->contains_variable
)
2540 ret
|= dest_lat
->set_contains_variable ();
2545 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2546 use it for indirect inlining), we should propagate them too. */
2547 return dest_lat
->set_contains_variable ();
2550 /* Propagate scalar values across jump function JFUNC that is associated with
2551 edge CS and describes argument IDX and put the values into DEST_LAT. */
2554 propagate_context_across_jump_function (cgraph_edge
*cs
,
2555 ipa_jump_func
*jfunc
, int idx
,
2556 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2558 if (dest_lat
->bottom
)
2560 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2562 bool added_sth
= false;
2563 bool type_preserved
= true;
2565 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2566 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2569 edge_ctx
= *edge_ctx_ptr
;
2571 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2572 || jfunc
->type
== IPA_JF_ANCESTOR
)
2574 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2576 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2578 /* TODO: Once we figure out how to propagate speculations, it will
2579 probably be a good idea to switch to speculation if type_preserved is
2580 not set instead of punting. */
2581 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2583 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2585 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2586 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2590 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2591 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2594 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2595 /* If we would need to clone the caller and cannot, do not propagate. */
2596 if (!ipcp_versionable_function_p (cs
->caller
)
2597 && (src_lat
->contains_variable
2598 || (src_lat
->values_count
> 1)))
2601 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2602 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2604 ipa_polymorphic_call_context cur
= src_val
->value
;
2606 if (!type_preserved
)
2607 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2608 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2609 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2610 /* TODO: In cases we know how the context is going to be used,
2611 we can improve the result by passing proper OTR_TYPE. */
2612 cur
.combine_with (edge_ctx
);
2613 if (!cur
.useless_p ())
2615 if (src_lat
->contains_variable
2616 && !edge_ctx
.equal_to (cur
))
2617 ret
|= dest_lat
->set_contains_variable ();
2618 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2627 if (!edge_ctx
.useless_p ())
2628 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2630 ret
|= dest_lat
->set_contains_variable ();
2636 /* Propagate bits across jfunc that is associated with
2637 edge cs and update dest_lattice accordingly. */
2640 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2641 ipa_jump_func
*jfunc
,
2642 ipcp_bits_lattice
*dest_lattice
)
2644 if (dest_lattice
->bottom_p ())
2647 enum availability availability
;
2648 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2649 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2650 tree parm_type
= ipa_get_type (callee_info
, idx
);
2652 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2653 transform for these cases. Similarly, we can have bad type mismatches
2654 with LTO, avoid doing anything with those too. */
2656 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2658 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2659 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2660 "param %i of %s is NULL or unsuitable for bits propagation\n",
2661 idx
, cs
->callee
->dump_name ());
2663 return dest_lattice
->set_to_bottom ();
2666 unsigned precision
= TYPE_PRECISION (parm_type
);
2667 signop sgn
= TYPE_SIGN (parm_type
);
2669 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2670 || jfunc
->type
== IPA_JF_ANCESTOR
)
2672 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2673 tree operand
= NULL_TREE
;
2674 enum tree_code code
;
2676 bool keep_null
= false;
2678 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2680 code
= ipa_get_jf_pass_through_operation (jfunc
);
2681 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2682 if (code
!= NOP_EXPR
)
2683 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2687 code
= POINTER_PLUS_EXPR
;
2688 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2689 unsigned HOST_WIDE_INT offset
2690 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2691 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2692 operand
= build_int_cstu (size_type_node
, offset
);
2695 class ipcp_param_lattices
*src_lats
2696 = ipa_get_parm_lattices (caller_info
, src_idx
);
2698 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2704 Assume lattice for x is bottom, however we can still propagate
2705 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2706 and we store it in jump function during analysis stage. */
2708 if (!src_lats
->bits_lattice
.bottom_p ())
2711 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2713 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2714 sgn
, code
, operand
, drop_all_ones
);
2719 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2722 return dest_lattice
->set_to_bottom ();
2725 /* Propagate value range across jump function JFUNC that is associated with
2726 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2730 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2731 class ipcp_param_lattices
*dest_plats
,
2734 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2736 if (dest_lat
->bottom_p ())
2740 || (!INTEGRAL_TYPE_P (param_type
)
2741 && !POINTER_TYPE_P (param_type
)))
2742 return dest_lat
->set_to_bottom ();
2744 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2746 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2747 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2748 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2749 class ipcp_param_lattices
*src_lats
2750 = ipa_get_parm_lattices (caller_info
, src_idx
);
2751 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2753 if (src_lats
->m_value_range
.bottom_p ())
2754 return dest_lat
->set_to_bottom ();
2757 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2758 ipa_vr_operation_and_type_effects (&vr
,
2759 &src_lats
->m_value_range
.m_vr
,
2760 operation
, param_type
,
2762 /* A crude way to prevent unbounded number of value range updates
2763 in SCC components. We should allow limited number of updates within
2765 else if (!ipa_edge_within_scc (cs
))
2767 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2769 value_range op_res
,res
;
2770 range_op_handler
handler (operation
, operand_type
);
2772 ipa_range_set_and_normalize (op_vr
, op
);
2775 || !op_res
.supports_type_p (operand_type
)
2776 || !handler
.fold_range (op_res
, operand_type
,
2777 src_lats
->m_value_range
.m_vr
, op_vr
))
2778 op_res
.set_varying (operand_type
);
2780 ipa_vr_operation_and_type_effects (&vr
,
2782 NOP_EXPR
, param_type
,
2785 if (!vr
.undefined_p () && !vr
.varying_p ())
2790 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2793 jfunc
->m_vr
->type ()))
2796 return dest_lat
->meet_with (&vr
);
2799 else if (jfunc
->type
== IPA_JF_CONST
)
2801 tree val
= ipa_get_jf_constant (jfunc
);
2802 if (TREE_CODE (val
) == INTEGER_CST
)
2804 val
= fold_convert (param_type
, val
);
2805 if (TREE_OVERFLOW_P (val
))
2806 val
= drop_tree_overflow (val
);
2808 value_range
tmpvr (TREE_TYPE (val
),
2809 wi::to_wide (val
), wi::to_wide (val
));
2810 return dest_lat
->meet_with (&tmpvr
);
2816 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2818 jfunc
->m_vr
->type ()))
2819 return dest_lat
->meet_with (&vr
);
2821 return dest_lat
->set_to_bottom ();
2824 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2825 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2826 other cases, return false). If there are no aggregate items, set
2827 aggs_by_ref to NEW_AGGS_BY_REF. */
2830 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2831 bool new_aggs_by_ref
)
2833 if (dest_plats
->aggs
)
2835 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2837 set_agg_lats_to_bottom (dest_plats
);
2842 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2846 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2847 already existing lattice for the given OFFSET and SIZE, marking all skipped
2848 lattices as containing variable and checking for overlaps. If there is no
2849 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2850 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2851 unless there are too many already. If there are two many, return false. If
2852 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2853 skipped lattices were newly marked as containing variable, set *CHANGE to
2854 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2857 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2858 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2859 struct ipcp_agg_lattice
***aglat
,
2860 bool pre_existing
, bool *change
, int max_agg_items
)
2862 gcc_checking_assert (offset
>= 0);
2864 while (**aglat
&& (**aglat
)->offset
< offset
)
2866 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2868 set_agg_lats_to_bottom (dest_plats
);
2871 *change
|= (**aglat
)->set_contains_variable ();
2872 *aglat
= &(**aglat
)->next
;
2875 if (**aglat
&& (**aglat
)->offset
== offset
)
2877 if ((**aglat
)->size
!= val_size
)
2879 set_agg_lats_to_bottom (dest_plats
);
2882 gcc_assert (!(**aglat
)->next
2883 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2888 struct ipcp_agg_lattice
*new_al
;
2890 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2892 set_agg_lats_to_bottom (dest_plats
);
2895 if (dest_plats
->aggs_count
== max_agg_items
)
2897 dest_plats
->aggs_count
++;
2898 new_al
= ipcp_agg_lattice_pool
.allocate ();
2899 memset (new_al
, 0, sizeof (*new_al
));
2901 new_al
->offset
= offset
;
2902 new_al
->size
= val_size
;
2903 new_al
->contains_variable
= pre_existing
;
2905 new_al
->next
= **aglat
;
2911 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2912 containing an unknown value. */
2915 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2920 ret
|= aglat
->set_contains_variable ();
2921 aglat
= aglat
->next
;
2926 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2927 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2928 parameter used for lattice value sources. Return true if DEST_PLATS changed
2932 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2933 class ipcp_param_lattices
*dest_plats
,
2934 class ipcp_param_lattices
*src_plats
,
2935 int src_idx
, HOST_WIDE_INT offset_delta
)
2937 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2938 struct ipcp_agg_lattice
**dst_aglat
;
2941 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2943 if (src_plats
->aggs_bottom
)
2944 return set_agg_lats_contain_variable (dest_plats
);
2945 if (src_plats
->aggs_contain_variable
)
2946 ret
|= set_agg_lats_contain_variable (dest_plats
);
2947 dst_aglat
= &dest_plats
->aggs
;
2949 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2950 param_ipa_max_agg_items
);
2951 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2953 src_aglat
= src_aglat
->next
)
2955 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2959 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2960 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2962 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2964 dst_aglat
= &(*dst_aglat
)->next
;
2965 if (src_aglat
->bottom
)
2967 ret
|= new_al
->set_contains_variable ();
2970 if (src_aglat
->contains_variable
)
2971 ret
|= new_al
->set_contains_variable ();
2972 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2975 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2978 else if (dest_plats
->aggs_bottom
)
2981 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2985 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2986 pass-through JFUNC and if so, whether it has conform and conforms to the
2987 rules about propagating values passed by reference. */
2990 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2991 struct ipa_jump_func
*jfunc
)
2993 return src_plats
->aggs
2994 && (!src_plats
->aggs_by_ref
2995 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2998 /* Propagate values through ITEM, jump function for a part of an aggregate,
2999 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3000 associated with the jump function. Return true if AGLAT changed in any
3004 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
3005 struct ipa_agg_jf_item
*item
,
3006 struct ipcp_agg_lattice
*aglat
)
3008 class ipa_node_params
*caller_info
;
3009 class ipcp_param_lattices
*src_plats
;
3010 struct ipcp_lattice
<tree
> *src_lat
;
3011 HOST_WIDE_INT src_offset
;
3016 if (item
->jftype
== IPA_JF_CONST
)
3018 tree value
= item
->value
.constant
;
3020 gcc_checking_assert (is_gimple_ip_invariant (value
));
3021 return aglat
->add_value (value
, cs
, NULL
, 0);
3024 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
3025 || item
->jftype
== IPA_JF_LOAD_AGG
);
3027 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3028 src_idx
= item
->value
.pass_through
.formal_id
;
3029 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3031 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
3033 load_type
= NULL_TREE
;
3034 src_lat
= &src_plats
->itself
;
3039 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
3040 struct ipcp_agg_lattice
*src_aglat
;
3042 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
3043 if (src_aglat
->offset
>= load_offset
)
3046 load_type
= item
->value
.load_agg
.type
;
3048 || src_aglat
->offset
> load_offset
3049 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
3050 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
3051 return aglat
->set_contains_variable ();
3053 src_lat
= src_aglat
;
3054 src_offset
= load_offset
;
3058 || (!ipcp_versionable_function_p (cs
->caller
)
3059 && !src_lat
->is_single_const ()))
3060 return aglat
->set_contains_variable ();
3062 ret
= propagate_vals_across_arith_jfunc (cs
,
3063 item
->value
.pass_through
.operation
,
3065 item
->value
.pass_through
.operand
,
3071 if (src_lat
->contains_variable
)
3072 ret
|= aglat
->set_contains_variable ();
3077 /* Propagate scalar values across jump function JFUNC that is associated with
3078 edge CS and put the values into DEST_LAT. */
3081 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
3082 struct ipa_jump_func
*jfunc
,
3083 class ipcp_param_lattices
*dest_plats
)
3087 if (dest_plats
->aggs_bottom
)
3090 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3091 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3093 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3094 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
3095 class ipcp_param_lattices
*src_plats
;
3097 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3098 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
3100 /* Currently we do not produce clobber aggregate jump
3101 functions, replace with merging when we do. */
3102 gcc_assert (!jfunc
->agg
.items
);
3103 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
3108 else if (jfunc
->type
== IPA_JF_ANCESTOR
3109 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
3111 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3112 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
3113 class ipcp_param_lattices
*src_plats
;
3115 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3116 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
3118 /* Currently we do not produce clobber aggregate jump
3119 functions, replace with merging when we do. */
3120 gcc_assert (!jfunc
->agg
.items
);
3121 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
3122 ipa_get_jf_ancestor_offset (jfunc
));
3124 else if (!src_plats
->aggs_by_ref
)
3125 ret
|= set_agg_lats_to_bottom (dest_plats
);
3127 ret
|= set_agg_lats_contain_variable (dest_plats
);
3131 if (jfunc
->agg
.items
)
3133 bool pre_existing
= dest_plats
->aggs
!= NULL
;
3134 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
3135 struct ipa_agg_jf_item
*item
;
3138 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
3141 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
3142 param_ipa_max_agg_items
);
3143 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
3145 HOST_WIDE_INT val_size
;
3147 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
3149 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
3151 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
3152 &aglat
, pre_existing
, &ret
, max_agg_items
))
3154 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
3155 aglat
= &(*aglat
)->next
;
3157 else if (dest_plats
->aggs_bottom
)
3161 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
3164 ret
|= set_agg_lats_contain_variable (dest_plats
);
3169 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3170 non-thunk) destination, the call passes through a thunk. */
3173 call_passes_through_thunk (cgraph_edge
*cs
)
3175 cgraph_node
*alias_or_thunk
= cs
->callee
;
3176 while (alias_or_thunk
->alias
)
3177 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
3178 return alias_or_thunk
->thunk
;
3181 /* Propagate constants from the caller to the callee of CS. INFO describes the
3185 propagate_constants_across_call (struct cgraph_edge
*cs
)
3187 class ipa_node_params
*callee_info
;
3188 enum availability availability
;
3189 cgraph_node
*callee
;
3190 class ipa_edge_args
*args
;
3192 int i
, args_count
, parms_count
;
3194 callee
= cs
->callee
->function_symbol (&availability
);
3195 if (!callee
->definition
)
3197 gcc_checking_assert (callee
->has_gimple_body_p ());
3198 callee_info
= ipa_node_params_sum
->get (callee
);
3202 args
= ipa_edge_args_sum
->get (cs
);
3203 parms_count
= ipa_get_param_count (callee_info
);
3204 if (parms_count
== 0)
3207 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
3208 || !opt_for_fn (cs
->caller
->decl
, optimize
))
3210 for (i
= 0; i
< parms_count
; i
++)
3211 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3215 args_count
= ipa_get_cs_argument_count (args
);
3217 /* If this call goes through a thunk we must not propagate to the first (0th)
3218 parameter. However, we might need to uncover a thunk from below a series
3219 of aliases first. */
3220 if (call_passes_through_thunk (cs
))
3222 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3229 for (; (i
< args_count
) && (i
< parms_count
); i
++)
3231 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
3232 class ipcp_param_lattices
*dest_plats
;
3233 tree param_type
= ipa_get_type (callee_info
, i
);
3235 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
3236 if (availability
== AVAIL_INTERPOSABLE
)
3237 ret
|= set_all_contains_variable (dest_plats
);
3240 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
3241 &dest_plats
->itself
,
3243 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
3244 &dest_plats
->ctxlat
);
3246 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
3247 &dest_plats
->bits_lattice
);
3248 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
3250 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
3251 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
3252 dest_plats
, param_type
);
3254 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
3257 for (; i
< parms_count
; i
++)
3258 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
3263 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3264 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3265 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3266 KNOWN_AGGS is ignored. */
3269 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
3270 const vec
<tree
> &known_csts
,
3271 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
3272 const ipa_argagg_value_list
&avs
,
3275 int param_index
= ie
->indirect_info
->param_index
;
3276 HOST_WIDE_INT anc_offset
;
3280 *speculative
= false;
3282 if (param_index
== -1)
3285 if (!ie
->indirect_info
->polymorphic
)
3289 if (ie
->indirect_info
->agg_contents
)
3292 if ((unsigned) param_index
< known_csts
.length ()
3293 && known_csts
[param_index
])
3294 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3295 ie
->indirect_info
->offset
,
3296 ie
->indirect_info
->by_ref
);
3298 if (!t
&& ie
->indirect_info
->guaranteed_unmodified
)
3299 t
= avs
.get_value (param_index
,
3300 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3301 ie
->indirect_info
->by_ref
);
3303 else if ((unsigned) param_index
< known_csts
.length ())
3304 t
= known_csts
[param_index
];
3307 && TREE_CODE (t
) == ADDR_EXPR
3308 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3309 return TREE_OPERAND (t
, 0);
3314 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3317 gcc_assert (!ie
->indirect_info
->agg_contents
);
3318 gcc_assert (!ie
->indirect_info
->by_ref
);
3319 anc_offset
= ie
->indirect_info
->offset
;
3323 if ((unsigned) param_index
< known_csts
.length ()
3324 && known_csts
[param_index
])
3325 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3326 ie
->indirect_info
->offset
, true);
3328 /* Try to work out value of virtual table pointer value in replacements. */
3329 /* or known aggregate values. */
3331 t
= avs
.get_value (param_index
,
3332 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3335 /* If we found the virtual table pointer, lookup the target. */
3339 unsigned HOST_WIDE_INT offset
;
3340 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3343 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3344 vtable
, offset
, &can_refer
);
3348 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3349 || !possible_polymorphic_call_target_p
3350 (ie
, cgraph_node::get (target
)))
3352 /* Do not speculate builtin_unreachable, it is stupid! */
3353 if (ie
->indirect_info
->vptr_changed
)
3355 target
= ipa_impossible_devirt_target (ie
, target
);
3357 *speculative
= ie
->indirect_info
->vptr_changed
;
3364 /* Do we know the constant value of pointer? */
3365 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3366 t
= known_csts
[param_index
];
3368 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3370 ipa_polymorphic_call_context context
;
3371 if (known_contexts
.length () > (unsigned int) param_index
)
3373 context
= known_contexts
[param_index
];
3374 context
.offset_by (anc_offset
);
3375 if (ie
->indirect_info
->vptr_changed
)
3376 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3377 ie
->indirect_info
->otr_type
);
3380 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3381 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3382 if (!ctx2
.useless_p ())
3383 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3388 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3390 if (ie
->indirect_info
->vptr_changed
)
3391 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3392 ie
->indirect_info
->otr_type
);
3397 vec
<cgraph_node
*>targets
;
3400 targets
= possible_polymorphic_call_targets
3401 (ie
->indirect_info
->otr_type
,
3402 ie
->indirect_info
->otr_token
,
3404 if (!final
|| targets
.length () > 1)
3406 struct cgraph_node
*node
;
3409 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3410 || ie
->speculative
|| !ie
->maybe_hot_p ())
3412 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3413 ie
->indirect_info
->otr_token
,
3417 *speculative
= true;
3418 target
= node
->decl
;
3425 *speculative
= false;
3426 if (targets
.length () == 1)
3427 target
= targets
[0]->decl
;
3429 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3432 if (target
&& !possible_polymorphic_call_target_p (ie
,
3433 cgraph_node::get (target
)))
3437 target
= ipa_impossible_devirt_target (ie
, target
);
3443 /* If an indirect edge IE can be turned into a direct one based on data in
3444 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3445 whether the discovered target is only speculative guess. */
3448 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3449 ipa_call_arg_values
*avals
,
3452 ipa_argagg_value_list
avl (avals
);
3453 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3454 avals
->m_known_contexts
,
3458 /* Calculate devirtualization time bonus for NODE, assuming we know information
3459 about arguments stored in AVALS. */
3462 devirtualization_time_bonus (struct cgraph_node
*node
,
3463 ipa_auto_call_arg_values
*avals
)
3465 struct cgraph_edge
*ie
;
3468 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3470 struct cgraph_node
*callee
;
3471 class ipa_fn_summary
*isummary
;
3472 enum availability avail
;
3476 ipa_argagg_value_list
avl (avals
);
3477 target
= ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3478 avals
->m_known_contexts
,
3483 /* Only bare minimum benefit for clearly un-inlineable targets. */
3485 callee
= cgraph_node::get (target
);
3486 if (!callee
|| !callee
->definition
)
3488 callee
= callee
->function_symbol (&avail
);
3489 if (avail
< AVAIL_AVAILABLE
)
3491 isummary
= ipa_fn_summaries
->get (callee
);
3492 if (!isummary
|| !isummary
->inlinable
)
3495 int size
= ipa_size_summaries
->get (callee
)->size
;
3496 /* FIXME: The values below need re-considering and perhaps also
3497 integrating into the cost metrics, at lest in some very basic way. */
3498 int max_inline_insns_auto
3499 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3500 if (size
<= max_inline_insns_auto
/ 4)
3501 res
+= 31 / ((int)speculative
+ 1);
3502 else if (size
<= max_inline_insns_auto
/ 2)
3503 res
+= 15 / ((int)speculative
+ 1);
3504 else if (size
<= max_inline_insns_auto
3505 || DECL_DECLARED_INLINE_P (callee
->decl
))
3506 res
+= 7 / ((int)speculative
+ 1);
3512 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3515 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3518 ipa_hints hints
= estimates
.hints
;
3519 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3520 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3522 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3524 if (hints
& INLINE_HINT_loop_iterations
)
3525 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3527 if (hints
& INLINE_HINT_loop_stride
)
3528 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3533 /* If there is a reason to penalize the function described by INFO in the
3534 cloning goodness evaluation, do so. */
3537 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3540 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3541 evaluation
= (evaluation
3542 * (100 - opt_for_fn (node
->decl
,
3543 param_ipa_cp_recursion_penalty
))) / 100;
3545 if (info
->node_calling_single_call
)
3546 evaluation
= (evaluation
3547 * (100 - opt_for_fn (node
->decl
,
3548 param_ipa_cp_single_call_penalty
)))
3554 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3555 and SIZE_COST and with the sum of frequencies of incoming edges to the
3556 potential new clone in FREQUENCIES. */
3559 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3560 sreal freq_sum
, profile_count count_sum
,
3563 if (time_benefit
== 0
3564 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3565 || node
->optimize_for_size_p ())
3568 gcc_assert (size_cost
> 0);
3570 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3571 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3572 if (count_sum
.nonzero_p ())
3574 gcc_assert (base_count
.nonzero_p ());
3575 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3576 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3577 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3580 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3582 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3583 "size: %i, count_sum: ", time_benefit
.to_double (),
3585 count_sum
.dump (dump_file
);
3586 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3587 info
->node_within_scc
3588 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3589 info
->node_calling_single_call
? ", single_call" : "",
3590 evaluation
.to_double (), eval_threshold
);
3593 return evaluation
.to_int () >= eval_threshold
;
3597 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3598 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3601 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3602 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3603 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3605 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3606 info
->node_within_scc
3607 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3608 info
->node_calling_single_call
? ", single_call" : "",
3609 evaluation
.to_double (), eval_threshold
);
3611 return evaluation
.to_int () >= eval_threshold
;
3615 /* Grow vectors in AVALS and fill them with information about values of
3616 parameters that are known to be independent of the context. Only calculate
3617 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3618 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3619 parameters will be stored in it.
3621 TODO: Also grow context independent value range vectors. */
3624 gather_context_independent_values (class ipa_node_params
*info
,
3625 ipa_auto_call_arg_values
*avals
,
3626 bool calculate_aggs
,
3627 int *removable_params_cost
)
3629 int i
, count
= ipa_get_param_count (info
);
3632 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3633 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3635 if (removable_params_cost
)
3636 *removable_params_cost
= 0;
3638 for (i
= 0; i
< count
; i
++)
3640 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3641 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3643 if (lat
->is_single_const ())
3645 ipcp_value
<tree
> *val
= lat
->values
;
3646 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3647 avals
->m_known_vals
[i
] = val
->value
;
3648 if (removable_params_cost
)
3649 *removable_params_cost
3650 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3653 else if (removable_params_cost
3654 && !ipa_is_param_used (info
, i
))
3655 *removable_params_cost
3656 += ipa_get_param_move_cost (info
, i
);
3658 if (!ipa_is_param_used (info
, i
))
3661 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3662 /* Do not account known context as reason for cloning. We can see
3663 if it permits devirtualization. */
3664 if (ctxlat
->is_single_const ())
3665 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3668 ret
|= push_agg_values_from_plats (plats
, i
, 0, &avals
->m_known_aggs
);
3674 /* Perform time and size measurement of NODE with the context given in AVALS,
3675 calculate the benefit compared to the node without specialization and store
3676 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3677 context-independent or unused removable parameters and EST_MOVE_COST, the
3678 estimated movement of the considered parameter. */
3681 perform_estimation_of_a_value (cgraph_node
*node
,
3682 ipa_auto_call_arg_values
*avals
,
3683 int removable_params_cost
, int est_move_cost
,
3684 ipcp_value_base
*val
)
3687 ipa_call_estimates estimates
;
3689 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3691 /* Extern inline functions have no cloning local time benefits because they
3692 will be inlined anyway. The only reason to clone them is if it enables
3693 optimization in any of the functions they call. */
3694 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3697 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3698 + (devirtualization_time_bonus (node
, avals
)
3699 + hint_time_bonus (node
, estimates
)
3700 + removable_params_cost
+ est_move_cost
);
3702 int size
= estimates
.size
;
3703 gcc_checking_assert (size
>=0);
3704 /* The inliner-heuristics based estimates may think that in certain
3705 contexts some functions do not have any size at all but we want
3706 all specializations to have at least a tiny cost, not least not to
3711 val
->local_time_benefit
= time_benefit
;
3712 val
->local_size_cost
= size
;
3715 /* Get the overall limit oof growth based on parameters extracted from growth.
3716 it does not really make sense to mix functions with different overall growth
3717 limits but it is possible and if it happens, we do not want to select one
3721 get_max_overall_size (cgraph_node
*node
)
3723 long max_new_size
= orig_overall_size
;
3724 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3725 if (max_new_size
< large_unit
)
3726 max_new_size
= large_unit
;
3727 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3728 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3729 return max_new_size
;
3732 /* Return true if NODE should be cloned just for a parameter removal, possibly
3733 dumping a reason if not. */
3736 clone_for_param_removal_p (cgraph_node
*node
)
3738 if (!node
->can_change_signature
)
3740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3741 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3742 "function cannot change signature.\n");
3745 if (node
->can_be_local_p ())
3747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3748 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3749 "IPA-SRA can do it potentially better.\n");
3755 /* Iterate over known values of parameters of NODE and estimate the local
3756 effects in terms of time and size they have. */
3759 estimate_local_effects (struct cgraph_node
*node
)
3761 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3762 int count
= ipa_get_param_count (info
);
3764 int removable_params_cost
;
3766 if (!count
|| !ipcp_versionable_function_p (node
))
3769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3770 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3772 ipa_auto_call_arg_values avals
;
3773 always_const
= gather_context_independent_values (info
, &avals
, true,
3774 &removable_params_cost
);
3775 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3776 if (always_const
|| devirt_bonus
3777 || (removable_params_cost
&& clone_for_param_removal_p (node
)))
3779 struct caller_statistics stats
;
3780 ipa_call_estimates estimates
;
3782 init_caller_stats (&stats
);
3783 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3785 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3786 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3787 time
+= devirt_bonus
;
3788 time
+= hint_time_bonus (node
, estimates
);
3789 time
+= removable_params_cost
;
3790 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3793 fprintf (dump_file
, " - context independent values, size: %i, "
3794 "time_benefit: %f\n", size
, (time
).to_double ());
3796 if (size
<= 0 || node
->local
)
3798 info
->do_clone_for_all_contexts
= true;
3801 fprintf (dump_file
, " Decided to specialize for all "
3802 "known contexts, code not going to grow.\n");
3804 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3805 stats
.count_sum
, size
))
3807 if (size
+ overall_size
<= get_max_overall_size (node
))
3809 info
->do_clone_for_all_contexts
= true;
3810 overall_size
+= size
;
3813 fprintf (dump_file
, " Decided to specialize for all "
3814 "known contexts, growth (to %li) deemed "
3815 "beneficial.\n", overall_size
);
3817 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3818 fprintf (dump_file
, " Not cloning for all contexts because "
3819 "maximum unit size would be reached with %li.\n",
3820 size
+ overall_size
);
3822 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3823 fprintf (dump_file
, " Not cloning for all contexts because "
3824 "!good_cloning_opportunity_p.\n");
3828 for (int i
= 0; i
< count
; i
++)
3830 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3831 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3832 ipcp_value
<tree
> *val
;
3836 || avals
.m_known_vals
[i
])
3839 for (val
= lat
->values
; val
; val
= val
->next
)
3841 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3842 avals
.m_known_vals
[i
] = val
->value
;
3844 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3845 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3850 fprintf (dump_file
, " - estimates for value ");
3851 print_ipcp_constant_value (dump_file
, val
->value
);
3852 fprintf (dump_file
, " for ");
3853 ipa_dump_param (dump_file
, info
, i
);
3854 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3855 val
->local_time_benefit
.to_double (),
3856 val
->local_size_cost
);
3859 avals
.m_known_vals
[i
] = NULL_TREE
;
3862 for (int i
= 0; i
< count
; i
++)
3864 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3866 if (!plats
->virt_call
)
3869 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3870 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3874 || !avals
.m_known_contexts
[i
].useless_p ())
3877 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3879 avals
.m_known_contexts
[i
] = val
->value
;
3880 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3883 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3885 fprintf (dump_file
, " - estimates for polymorphic context ");
3886 print_ipcp_constant_value (dump_file
, val
->value
);
3887 fprintf (dump_file
, " for ");
3888 ipa_dump_param (dump_file
, info
, i
);
3889 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3890 val
->local_time_benefit
.to_double (),
3891 val
->local_size_cost
);
3894 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3897 unsigned all_ctx_len
= avals
.m_known_aggs
.length ();
3898 auto_vec
<ipa_argagg_value
, 32> all_ctx
;
3899 all_ctx
.reserve_exact (all_ctx_len
);
3900 all_ctx
.splice (avals
.m_known_aggs
);
3901 avals
.m_known_aggs
.safe_grow_cleared (all_ctx_len
+ 1);
3904 for (int index
= 0; index
< count
; index
++)
3906 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, index
);
3908 if (plats
->aggs_bottom
|| !plats
->aggs
)
3911 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3913 ipcp_value
<tree
> *val
;
3914 if (aglat
->bottom
|| !aglat
->values
3915 /* If the following is true, the one value is already part of all
3916 context estimations. */
3917 || (!plats
->aggs_contain_variable
3918 && aglat
->is_single_const ()))
3921 unsigned unit_offset
= aglat
->offset
/ BITS_PER_UNIT
;
3922 while (j
< all_ctx_len
3923 && (all_ctx
[j
].index
< index
3924 || (all_ctx
[j
].index
== index
3925 && all_ctx
[j
].unit_offset
< unit_offset
)))
3927 avals
.m_known_aggs
[j
] = all_ctx
[j
];
3931 for (unsigned k
= j
; k
< all_ctx_len
; k
++)
3932 avals
.m_known_aggs
[k
+1] = all_ctx
[k
];
3934 for (val
= aglat
->values
; val
; val
= val
->next
)
3936 avals
.m_known_aggs
[j
].value
= val
->value
;
3937 avals
.m_known_aggs
[j
].unit_offset
= unit_offset
;
3938 avals
.m_known_aggs
[j
].index
= index
;
3939 avals
.m_known_aggs
[j
].by_ref
= plats
->aggs_by_ref
;
3941 perform_estimation_of_a_value (node
, &avals
,
3942 removable_params_cost
, 0, val
);
3944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3946 fprintf (dump_file
, " - estimates for value ");
3947 print_ipcp_constant_value (dump_file
, val
->value
);
3948 fprintf (dump_file
, " for ");
3949 ipa_dump_param (dump_file
, info
, index
);
3950 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3951 "]: time_benefit: %g, size: %i\n",
3952 plats
->aggs_by_ref
? "ref " : "",
3954 val
->local_time_benefit
.to_double (),
3955 val
->local_size_cost
);
3963 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3964 topological sort of values. */
3966 template <typename valtype
>
3968 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3970 ipcp_value_source
<valtype
> *src
;
3976 cur_val
->dfs
= dfs_counter
;
3977 cur_val
->low_link
= dfs_counter
;
3979 cur_val
->topo_next
= stack
;
3981 cur_val
->on_stack
= true;
3983 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3986 if (src
->val
->dfs
== 0)
3989 if (src
->val
->low_link
< cur_val
->low_link
)
3990 cur_val
->low_link
= src
->val
->low_link
;
3992 else if (src
->val
->on_stack
3993 && src
->val
->dfs
< cur_val
->low_link
)
3994 cur_val
->low_link
= src
->val
->dfs
;
3997 if (cur_val
->dfs
== cur_val
->low_link
)
3999 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
4004 stack
= v
->topo_next
;
4005 v
->on_stack
= false;
4006 v
->scc_no
= cur_val
->dfs
;
4008 v
->scc_next
= scc_list
;
4011 while (v
!= cur_val
);
4013 cur_val
->topo_next
= values_topo
;
4014 values_topo
= cur_val
;
4018 /* Add all values in lattices associated with NODE to the topological sort if
4019 they are not there yet. */
4022 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
4024 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4025 int i
, count
= ipa_get_param_count (info
);
4027 for (i
= 0; i
< count
; i
++)
4029 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
4030 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
4031 struct ipcp_agg_lattice
*aglat
;
4035 ipcp_value
<tree
> *val
;
4036 for (val
= lat
->values
; val
; val
= val
->next
)
4037 topo
->constants
.add_val (val
);
4040 if (!plats
->aggs_bottom
)
4041 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4044 ipcp_value
<tree
> *val
;
4045 for (val
= aglat
->values
; val
; val
= val
->next
)
4046 topo
->constants
.add_val (val
);
4049 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
4050 if (!ctxlat
->bottom
)
4052 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
4053 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
4054 topo
->contexts
.add_val (ctxval
);
4059 /* One pass of constants propagation along the call graph edges, from callers
4060 to callees (requires topological ordering in TOPO), iterate over strongly
4061 connected components. */
4064 propagate_constants_topo (class ipa_topo_info
*topo
)
4068 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
4071 struct cgraph_node
*v
, *node
= topo
->order
[i
];
4072 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
4074 /* First, iteratively propagate within the strongly connected component
4075 until all lattices stabilize. */
4076 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4077 if (v
->has_gimple_body_p ())
4079 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
4080 && opt_for_fn (v
->decl
, optimize
))
4081 push_node_to_stack (topo
, v
);
4082 /* When V is not optimized, we can not push it to stack, but
4083 still we need to set all its callees lattices to bottom. */
4086 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4087 propagate_constants_across_call (cs
);
4091 v
= pop_node_from_stack (topo
);
4094 struct cgraph_edge
*cs
;
4095 class ipa_node_params
*info
= NULL
;
4096 bool self_scc
= true;
4098 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4099 if (ipa_edge_within_scc (cs
))
4101 cgraph_node
*callee
= cs
->callee
->function_symbol ();
4108 info
= ipa_node_params_sum
->get (v
);
4109 info
->node_within_scc
= true;
4112 if (propagate_constants_across_call (cs
))
4113 push_node_to_stack (topo
, callee
);
4117 info
->node_is_self_scc
= self_scc
;
4119 v
= pop_node_from_stack (topo
);
4122 /* Afterwards, propagate along edges leading out of the SCC, calculates
4123 the local effects of the discovered constants and all valid values to
4124 their topological sort. */
4125 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4126 if (v
->has_gimple_body_p ()
4127 && opt_for_fn (v
->decl
, flag_ipa_cp
)
4128 && opt_for_fn (v
->decl
, optimize
))
4130 struct cgraph_edge
*cs
;
4132 estimate_local_effects (v
);
4133 add_all_node_vals_to_toposort (v
, topo
);
4134 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4135 if (!ipa_edge_within_scc (cs
))
4136 propagate_constants_across_call (cs
);
4138 cycle_nodes
.release ();
4142 /* Propagate the estimated effects of individual values along the topological
4143 from the dependent values to those they depend on. */
4145 template <typename valtype
>
4147 value_topo_info
<valtype
>::propagate_effects ()
4149 ipcp_value
<valtype
> *base
;
4150 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
4152 for (base
= values_topo
; base
; base
= base
->topo_next
)
4154 ipcp_value_source
<valtype
> *src
;
4155 ipcp_value
<valtype
> *val
;
4157 HOST_WIDE_INT size
= 0;
4159 for (val
= base
; val
; val
= val
->scc_next
)
4161 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
4162 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
4165 for (val
= base
; val
; val
= val
->scc_next
)
4167 processed_srcvals
.empty ();
4168 for (src
= val
->sources
; src
; src
= src
->next
)
4170 && src
->cs
->maybe_hot_p ())
4172 if (!processed_srcvals
.add (src
->val
))
4174 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
4175 if (prop_size
< INT_MAX
)
4176 src
->val
->prop_size_cost
= prop_size
;
4181 int special_factor
= 1;
4182 if (val
->same_scc (src
->val
))
4184 = opt_for_fn(src
->cs
->caller
->decl
,
4185 param_ipa_cp_recursive_freq_factor
);
4186 else if (val
->self_recursion_generated_p ()
4187 && (src
->cs
->callee
->function_symbol ()
4188 == src
->cs
->caller
))
4190 int max_recur_gen_depth
4191 = opt_for_fn(src
->cs
->caller
->decl
,
4192 param_ipa_cp_max_recursive_depth
);
4193 special_factor
= max_recur_gen_depth
4194 - val
->self_recursion_generated_level
+ 1;
4197 src
->val
->prop_time_benefit
4198 += time
* special_factor
* src
->cs
->sreal_frequency ();
4203 val
->prop_time_benefit
= time
;
4204 val
->prop_size_cost
= size
;
4208 val
->prop_time_benefit
= 0;
4209 val
->prop_size_cost
= 0;
4215 /* Callback for qsort to sort counts of all edges. */
4218 compare_edge_profile_counts (const void *a
, const void *b
)
4220 const profile_count
*cnt1
= (const profile_count
*) a
;
4221 const profile_count
*cnt2
= (const profile_count
*) b
;
4231 /* Propagate constants, polymorphic contexts and their effects from the
4232 summaries interprocedurally. */
4235 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4237 struct cgraph_node
*node
;
4240 fprintf (dump_file
, "\n Propagating constants:\n\n");
4242 base_count
= profile_count::uninitialized ();
4244 bool compute_count_base
= false;
4245 unsigned base_count_pos_percent
= 0;
4246 FOR_EACH_DEFINED_FUNCTION (node
)
4248 if (node
->has_gimple_body_p ()
4249 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4250 && opt_for_fn (node
->decl
, optimize
))
4252 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4253 determine_versionability (node
, info
);
4255 unsigned nlattices
= ipa_get_param_count (info
);
4256 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
4257 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
4258 initialize_node_lattices (node
);
4260 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4261 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4262 overall_size
+= s
->self_size
;
4263 if (node
->count
.ipa ().initialized_p ())
4265 compute_count_base
= true;
4266 unsigned pos_percent
= opt_for_fn (node
->decl
,
4267 param_ipa_cp_profile_count_base
);
4268 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4272 if (compute_count_base
)
4274 auto_vec
<profile_count
> all_edge_counts
;
4275 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4276 FOR_EACH_DEFINED_FUNCTION (node
)
4277 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4279 profile_count count
= cs
->count
.ipa ();
4280 if (!count
.nonzero_p ())
4283 enum availability avail
;
4285 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4286 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4287 if (info
&& info
->versionable
)
4288 all_edge_counts
.quick_push (count
);
4291 if (!all_edge_counts
.is_empty ())
4293 gcc_assert (base_count_pos_percent
<= 100);
4294 all_edge_counts
.qsort (compare_edge_profile_counts
);
4296 unsigned base_count_pos
4297 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4298 base_count
= all_edge_counts
[base_count_pos
];
4302 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4303 "position %u, arriving at: ", all_edge_counts
.length (),
4305 base_count
.dump (dump_file
);
4306 fprintf (dump_file
, "\n");
4310 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4311 "continuing as if without profile feedback.\n");
4314 orig_overall_size
= overall_size
;
4317 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4319 propagate_constants_topo (topo
);
4321 ipcp_verify_propagated_values ();
4322 topo
->constants
.propagate_effects ();
4323 topo
->contexts
.propagate_effects ();
4327 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4328 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4332 /* Discover newly direct outgoing edges from NODE which is a new clone with
4333 known KNOWN_CSTS and make them direct. */
4336 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4337 vec
<tree
> known_csts
,
4338 vec
<ipa_polymorphic_call_context
>
4340 vec
<ipa_argagg_value
, va_gc
> *aggvals
)
4342 struct cgraph_edge
*ie
, *next_ie
;
4345 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4350 next_ie
= ie
->next_callee
;
4351 ipa_argagg_value_list
avs (aggvals
);
4352 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4356 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4357 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4358 int param_index
= ie
->indirect_info
->param_index
;
4359 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4363 if (cs
&& !agg_contents
&& !polymorphic
)
4365 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4366 int c
= ipa_get_controlled_uses (info
, param_index
);
4367 if (c
!= IPA_UNDESCRIBED_USE
4368 && !ipa_get_param_load_dereferenced (info
, param_index
))
4370 struct ipa_ref
*to_del
;
4373 ipa_set_controlled_uses (info
, param_index
, c
);
4374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4375 fprintf (dump_file
, " controlled uses count of param "
4376 "%i bumped down to %i\n", param_index
, c
);
4378 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0,
4381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4382 fprintf (dump_file
, " and even removing its "
4383 "cloning-created reference\n");
4384 to_del
->remove_reference ();
4390 /* Turning calls to direct calls will improve overall summary. */
4392 ipa_update_overall_fn_summary (node
);
4395 class edge_clone_summary
;
4396 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4398 /* Edge clone summary. */
4400 class edge_clone_summary
4403 /* Default constructor. */
4404 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4406 /* Default destructor. */
4407 ~edge_clone_summary ()
4410 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4412 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4415 cgraph_edge
*prev_clone
;
4416 cgraph_edge
*next_clone
;
4419 class edge_clone_summary_t
:
4420 public call_summary
<edge_clone_summary
*>
4423 edge_clone_summary_t (symbol_table
*symtab
):
4424 call_summary
<edge_clone_summary
*> (symtab
)
4426 m_initialize_when_cloning
= true;
4429 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4430 edge_clone_summary
*src_data
,
4431 edge_clone_summary
*dst_data
) final override
;
4434 /* Edge duplication hook. */
4437 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4438 edge_clone_summary
*src_data
,
4439 edge_clone_summary
*dst_data
)
4441 if (src_data
->next_clone
)
4442 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4443 dst_data
->prev_clone
= src_edge
;
4444 dst_data
->next_clone
= src_data
->next_clone
;
4445 src_data
->next_clone
= dst_edge
;
4448 /* Return true is CS calls DEST or its clone for all contexts. When
4449 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4450 edges from/to an all-context clone. */
4453 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4454 bool allow_recursion_to_clone
)
4456 enum availability availability
;
4457 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4459 if (availability
<= AVAIL_INTERPOSABLE
)
4463 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4466 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4467 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4470 /* Return true if edge CS does bring about the value described by SRC to
4471 DEST_VAL of node DEST or its clone for all contexts. */
4474 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4475 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4477 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4479 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4480 || caller_info
->node_dead
)
4486 if (caller_info
->ipcp_orig_node
)
4489 if (src
->offset
== -1)
4490 t
= caller_info
->known_csts
[src
->index
];
4491 else if (ipcp_transformation
*ts
4492 = ipcp_get_transformation_summary (cs
->caller
))
4494 ipa_argagg_value_list
avl (ts
);
4495 t
= avl
.get_value (src
->index
, src
->offset
/ BITS_PER_UNIT
);
4497 return (t
!= NULL_TREE
4498 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4502 if (src
->val
== dest_val
)
4505 struct ipcp_agg_lattice
*aglat
;
4506 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4508 if (src
->offset
== -1)
4509 return (plats
->itself
.is_single_const ()
4510 && values_equal_for_ipcp_p (src
->val
->value
,
4511 plats
->itself
.values
->value
));
4514 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4516 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4517 if (aglat
->offset
== src
->offset
)
4518 return (aglat
->is_single_const ()
4519 && values_equal_for_ipcp_p (src
->val
->value
,
4520 aglat
->values
->value
));
4526 /* Return true if edge CS does bring about the value described by SRC to
4527 DST_VAL of node DEST or its clone for all contexts. */
4530 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4531 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4533 ipcp_value
<ipa_polymorphic_call_context
> *)
4535 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4537 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4538 || caller_info
->node_dead
)
4543 if (caller_info
->ipcp_orig_node
)
4544 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4545 && values_equal_for_ipcp_p (src
->val
->value
,
4546 caller_info
->known_contexts
[src
->index
]);
4548 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4550 return plats
->ctxlat
.is_single_const ()
4551 && values_equal_for_ipcp_p (src
->val
->value
,
4552 plats
->ctxlat
.values
->value
);
4555 /* Get the next clone in the linked list of clones of an edge. */
4557 static inline struct cgraph_edge
*
4558 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4560 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4561 return s
!= NULL
? s
->next_clone
: NULL
;
4564 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4565 of them is viable and hot, return true. In that case, for those that still
4566 hold, add their edge frequency and their number and cumulative profile
4567 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4568 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4570 template <typename valtype
>
4572 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4573 sreal
*freq_sum
, int *caller_count
,
4574 profile_count
*rec_count_sum
,
4575 profile_count
*nonrec_count_sum
)
4577 ipcp_value_source
<valtype
> *src
;
4580 profile_count rec_cnt
= profile_count::zero ();
4581 profile_count nonrec_cnt
= profile_count::zero ();
4583 bool non_self_recursive
= false;
4585 for (src
= val
->sources
; src
; src
= src
->next
)
4587 struct cgraph_edge
*cs
= src
->cs
;
4590 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4593 freq
+= cs
->sreal_frequency ();
4594 hot
|= cs
->maybe_hot_p ();
4595 if (cs
->caller
!= dest
)
4597 non_self_recursive
= true;
4598 if (cs
->count
.ipa ().initialized_p ())
4599 rec_cnt
+= cs
->count
.ipa ();
4601 else if (cs
->count
.ipa ().initialized_p ())
4602 nonrec_cnt
+= cs
->count
.ipa ();
4604 cs
= get_next_cgraph_edge_clone (cs
);
4608 /* If the only edges bringing a value are self-recursive ones, do not bother
4610 if (!non_self_recursive
)
4614 *caller_count
= count
;
4615 *rec_count_sum
= rec_cnt
;
4616 *nonrec_count_sum
= nonrec_cnt
;
4618 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4620 struct cgraph_edge
*cs
;
4622 /* Cold non-SCC source edge could trigger hot recursive execution of
4623 function. Consider the case as hot and rely on following cost model
4624 computation to further select right one. */
4625 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4626 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4633 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4634 to let a non-self-recursive caller be the first element. Thus, we can
4635 simplify intersecting operations on values that arrive from all of these
4636 callers, especially when there exists self-recursive call. Return true if
4637 this kind of adjustment is possible. */
4640 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4643 for (unsigned i
= 0; i
< callers
.length (); i
++)
4645 cgraph_edge
*cs
= callers
[i
];
4647 if (cs
->caller
!= node
)
4651 callers
[i
] = callers
[0];
4660 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4661 is assumed their number is known and equal to CALLER_COUNT. */
4663 template <typename valtype
>
4664 static vec
<cgraph_edge
*>
4665 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4668 ipcp_value_source
<valtype
> *src
;
4669 vec
<cgraph_edge
*> ret
;
4671 ret
.create (caller_count
);
4672 for (src
= val
->sources
; src
; src
= src
->next
)
4674 struct cgraph_edge
*cs
= src
->cs
;
4677 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4678 ret
.quick_push (cs
);
4679 cs
= get_next_cgraph_edge_clone (cs
);
4683 if (caller_count
> 1)
4684 adjust_callers_for_value_intersection (ret
, dest
);
4689 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4690 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4691 should be set to true when the reference created for the constant should be
4692 a load one and not an address one because the corresponding parameter p is
4695 static struct ipa_replace_map
*
4696 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4697 bool force_load_ref
)
4699 struct ipa_replace_map
*replace_map
;
4701 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4704 fprintf (dump_file
, " replacing ");
4705 ipa_dump_param (dump_file
, info
, parm_num
);
4707 fprintf (dump_file
, " with const ");
4708 print_generic_expr (dump_file
, value
);
4711 fprintf (dump_file
, " - forcing load reference\n");
4713 fprintf (dump_file
, "\n");
4715 replace_map
->parm_num
= parm_num
;
4716 replace_map
->new_tree
= value
;
4717 replace_map
->force_load_ref
= force_load_ref
;
4721 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4722 one, otherwise it will be referred to as the original node. */
4725 dump_profile_updates (cgraph_node
*node
, bool spec
)
4728 fprintf (dump_file
, " setting count of the specialized node %s to ",
4729 node
->dump_name ());
4731 fprintf (dump_file
, " setting count of the original node %s to ",
4732 node
->dump_name ());
4734 node
->count
.dump (dump_file
);
4735 fprintf (dump_file
, "\n");
4736 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4738 fprintf (dump_file
, " edge to %s has count ",
4739 cs
->callee
->dump_name ());
4740 cs
->count
.dump (dump_file
);
4741 fprintf (dump_file
, "\n");
4745 /* With partial train run we do not want to assume that original's count is
4746 zero whenever we redurect all executed edges to clone. Simply drop profile
4747 to local one in this case. In eany case, return the new value. ORIG_NODE
4748 is the original node and its count has not been updaed yet. */
4751 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4753 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4754 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4755 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4756 remainder
= remainder
.guessed_local ();
4761 /* Structure to sum counts coming from nodes other than the original node and
4764 struct gather_other_count_struct
4767 profile_count other_count
;
4770 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4771 counts that come from non-self-recursive calls.. */
4774 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4776 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4777 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4778 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4779 desc
->other_count
+= cs
->count
.ipa ();
4783 /* Structure to help analyze if we need to boost counts of some clones of some
4784 non-recursive edges to match the new callee count. */
4786 struct desc_incoming_count_struct
4789 hash_set
<cgraph_edge
*> *processed_edges
;
4790 profile_count count
;
4791 unsigned unproc_orig_rec_edges
;
4794 /* Go over edges calling NODE and its thunks and gather information about
4795 incoming counts so that we know if we need to make any adjustments. */
4798 analyze_clone_icoming_counts (cgraph_node
*node
,
4799 desc_incoming_count_struct
*desc
)
4801 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4802 if (cs
->caller
->thunk
)
4804 analyze_clone_icoming_counts (cs
->caller
, desc
);
4809 if (cs
->count
.initialized_p ())
4810 desc
->count
+= cs
->count
.ipa ();
4811 if (!desc
->processed_edges
->contains (cs
)
4812 && cs
->caller
->clone_of
== desc
->orig
)
4813 desc
->unproc_orig_rec_edges
++;
4817 /* If caller edge counts of a clone created for a self-recursive arithmetic
4818 jump function must be adjusted because it is coming from a the "seed" clone
4819 for the first value and so has been excessively scaled back as if it was not
4820 a recursive call, adjust it so that the incoming counts of NODE match its
4821 count. NODE is the node or its thunk. */
4824 adjust_clone_incoming_counts (cgraph_node
*node
,
4825 desc_incoming_count_struct
*desc
)
4827 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4828 if (cs
->caller
->thunk
)
4830 adjust_clone_incoming_counts (cs
->caller
, desc
);
4831 profile_count sum
= profile_count::zero ();
4832 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4833 if (e
->count
.initialized_p ())
4834 sum
+= e
->count
.ipa ();
4835 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4837 else if (!desc
->processed_edges
->contains (cs
)
4838 && cs
->caller
->clone_of
== desc
->orig
)
4840 cs
->count
+= desc
->count
;
4843 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4844 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4845 cs
->callee
->dump_name ());
4846 cs
->count
.dump (dump_file
);
4847 fprintf (dump_file
, "\n");
4852 /* When ORIG_NODE has been cloned for values which have been generated fora
4853 self-recursive call as a result of an arithmetic pass-through
4854 jump-functions, adjust its count together with counts of all such clones in
4855 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4857 The function sums the counts of the original node and all its clones that
4858 cannot be attributed to a specific clone because it comes from a
4859 non-recursive edge. This sum is then evenly divided between the clones and
4860 on top of that each one gets all the counts which can be attributed directly
4864 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4865 const vec
<cgraph_node
*> &self_gen_clones
)
4867 profile_count redist_sum
= orig_node
->count
.ipa ();
4868 if (!(redist_sum
> profile_count::zero ()))
4872 fprintf (dump_file
, " Updating profile of self recursive clone "
4875 gather_other_count_struct gocs
;
4876 gocs
.orig
= orig_node
;
4877 gocs
.other_count
= profile_count::zero ();
4879 auto_vec
<profile_count
, 8> other_edges_count
;
4880 for (cgraph_node
*n
: self_gen_clones
)
4882 gocs
.other_count
= profile_count::zero ();
4883 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4885 other_edges_count
.safe_push (gocs
.other_count
);
4886 redist_sum
-= gocs
.other_count
;
4889 hash_set
<cgraph_edge
*> processed_edges
;
4891 for (cgraph_node
*n
: self_gen_clones
)
4893 profile_count orig_count
= n
->count
;
4894 profile_count new_count
4895 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4896 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4897 n
->count
= new_count
;
4898 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4899 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4901 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4902 processed_edges
.add (cs
);
4904 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4905 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4910 /* There are still going to be edges to ORIG_NODE that have one or more
4911 clones coming from another node clone in SELF_GEN_CLONES and which we
4912 scaled by the same amount, which means that the total incoming sum of
4913 counts to ORIG_NODE will be too high, scale such edges back. */
4914 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4916 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4919 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4920 if (e
->callee
->ultimate_alias_target () == orig_node
4921 && processed_edges
.contains (e
))
4924 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4925 if (e
->callee
->ultimate_alias_target () == orig_node
4926 && processed_edges
.contains (e
))
4931 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4932 along self-recursive edges are likely to have fairly low count and so
4933 edges from them to nodes in the self_gen_clones do not correspond to the
4934 artificially distributed count of the nodes, the total sum of incoming
4935 edges to some clones might be too low. Detect this situation and correct
4937 for (cgraph_node
*n
: self_gen_clones
)
4939 if (!(n
->count
.ipa () > profile_count::zero ()))
4942 desc_incoming_count_struct desc
;
4943 desc
.orig
= orig_node
;
4944 desc
.processed_edges
= &processed_edges
;
4945 desc
.count
= profile_count::zero ();
4946 desc
.unproc_orig_rec_edges
= 0;
4947 analyze_clone_icoming_counts (n
, &desc
);
4949 if (n
->count
.differs_from_p (desc
.count
))
4951 if (n
->count
> desc
.count
4952 && desc
.unproc_orig_rec_edges
> 0)
4954 desc
.count
= n
->count
- desc
.count
;
4955 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
4956 adjust_clone_incoming_counts (n
, &desc
);
4960 " Unable to fix up incoming counts for %s.\n",
4966 for (cgraph_node
*n
: self_gen_clones
)
4967 dump_profile_updates (n
, n
!= orig_node
);
4971 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4972 their profile information to reflect this. This function should not be used
4973 for clones generated for arithmetic pass-through jump functions on a
4974 self-recursive call graph edge, that situation is handled by
4975 update_counts_for_self_gen_clones. */
4978 update_profiling_info (struct cgraph_node
*orig_node
,
4979 struct cgraph_node
*new_node
)
4981 struct caller_statistics stats
;
4982 profile_count new_sum
;
4983 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4985 if (!(orig_node_count
> profile_count::zero ()))
4990 fprintf (dump_file
, " Updating profile from original count: ");
4991 orig_node_count
.dump (dump_file
);
4992 fprintf (dump_file
, "\n");
4995 init_caller_stats (&stats
, new_node
);
4996 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4998 new_sum
= stats
.count_sum
;
5000 bool orig_edges_processed
= false;
5001 if (new_sum
> orig_node_count
)
5003 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5004 to global0 category. */
5005 remainder
= orig_node
->count
.global0 ();
5007 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5008 cs
->count
= cs
->count
.global0 ();
5009 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5011 cs
= cs
->next_callee
)
5012 cs
->count
= cs
->count
.global0 ();
5013 orig_edges_processed
= true;
5015 else if (stats
.rec_count_sum
.nonzero_p ())
5017 int new_nonrec_calls
= stats
.n_nonrec_calls
;
5018 /* There are self-recursive edges which are likely to bring in the
5019 majority of calls but which we must divide in between the original and
5021 init_caller_stats (&stats
, orig_node
);
5022 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
5024 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
5025 profile_count orig_nonrec_call_count
= stats
.count_sum
;
5027 if (orig_node
->local
)
5029 if (!orig_nonrec_call_count
.nonzero_p ())
5032 fprintf (dump_file
, " The original is local and the only "
5033 "incoming edges from non-dead callers with nonzero "
5034 "counts are self-recursive, assuming it is cold.\n");
5035 /* The NEW_NODE count and counts of all its outgoing edges
5036 are still unmodified copies of ORIG_NODE's. Just clear
5037 the latter and bail out. */
5039 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
5040 zero
= profile_count::zero ().guessed_local ();
5042 zero
= profile_count::adjusted_zero ();
5043 orig_node
->count
= zero
;
5044 for (cgraph_edge
*cs
= orig_node
->callees
;
5046 cs
= cs
->next_callee
)
5048 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5050 cs
= cs
->next_callee
)
5057 /* Let's behave as if there was another caller that accounts for all
5058 the calls that were either indirect or from other compilation
5060 orig_nonrec_calls
++;
5061 profile_count pretend_caller_count
5062 = (orig_node_count
- new_sum
- orig_nonrec_call_count
5063 - stats
.rec_count_sum
);
5064 orig_nonrec_call_count
+= pretend_caller_count
;
5067 /* Divide all "unexplained" counts roughly proportionally to sums of
5068 counts of non-recursive calls.
5070 We put rather arbitrary limits on how many counts we claim because the
5071 number of non-self-recursive incoming count is only a rough guideline
5072 and there are cases (such as mcf) where using it blindly just takes
5073 too many. And if lattices are considered in the opposite order we
5074 could also take too few. */
5075 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
5077 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
5078 profile_count new_part
5079 = MAX(MIN (unexp
.apply_scale (new_sum
,
5080 new_sum
+ orig_nonrec_call_count
),
5081 unexp
.apply_scale (limit_den
- 1, limit_den
)),
5082 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
5085 fprintf (dump_file
, " Claiming ");
5086 new_part
.dump (dump_file
);
5087 fprintf (dump_file
, " of unexplained ");
5088 unexp
.dump (dump_file
);
5089 fprintf (dump_file
, " counts because of self-recursive "
5092 new_sum
+= new_part
;
5093 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5097 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5100 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
5101 new_node
->count
= new_sum
;
5102 orig_node
->count
= remainder
;
5104 profile_count orig_new_node_count
= orig_node_count
;
5105 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
5106 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5107 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5108 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
5109 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5111 if (!orig_edges_processed
)
5113 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
5114 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5115 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5116 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5118 cs
= cs
->next_callee
)
5119 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5124 dump_profile_updates (new_node
, true);
5125 dump_profile_updates (orig_node
, false);
5129 /* Update the respective profile of specialized NEW_NODE and the original
5130 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5131 have been redirected to the specialized version. */
5134 update_specialized_profile (struct cgraph_node
*new_node
,
5135 struct cgraph_node
*orig_node
,
5136 profile_count redirected_sum
)
5138 struct cgraph_edge
*cs
;
5139 profile_count new_node_count
, orig_node_count
= orig_node
->count
.ipa ();
5143 fprintf (dump_file
, " the sum of counts of redirected edges is ");
5144 redirected_sum
.dump (dump_file
);
5145 fprintf (dump_file
, "\n old ipa count of the original node is ");
5146 orig_node_count
.dump (dump_file
);
5147 fprintf (dump_file
, "\n");
5149 if (!(orig_node_count
> profile_count::zero ()))
5152 new_node_count
= new_node
->count
;
5153 new_node
->count
+= redirected_sum
;
5155 = lenient_count_portion_handling (orig_node
->count
- redirected_sum
,
5158 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5159 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
5161 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5163 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
5170 dump_profile_updates (new_node
, true);
5171 dump_profile_updates (orig_node
, false);
5175 static void adjust_references_in_caller (cgraph_edge
*cs
,
5176 symtab_node
*symbol
, int index
);
5178 /* Simple structure to pass a symbol and index (with same meaning as parameters
5179 of adjust_references_in_caller) through a void* parameter of a
5180 call_for_symbol_thunks_and_aliases callback. */
5181 struct symbol_and_index_together
5183 symtab_node
*symbol
;
5187 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5188 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5190 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
5192 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
5193 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
5194 if (!cs
->caller
->thunk
)
5195 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
5199 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5200 variable which is only dereferenced and which is represented by SYMBOL. See
5201 if we can remove ADDR reference in callers assosiated witht the call. */
5204 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
5206 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5207 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
5208 if (jfunc
->type
== IPA_JF_CONST
)
5210 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
5215 to_del
->remove_reference ();
5216 ipa_zap_jf_refdesc (jfunc
);
5218 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5219 cs
->caller
->dump_name (), symbol
->dump_name ());
5223 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
5224 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
5225 || ipa_get_jf_pass_through_refdesc_decremented (jfunc
))
5228 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5229 cgraph_node
*caller
= cs
->caller
;
5230 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
5231 /* TODO: This consistency check may be too big and not really
5232 that useful. Consider removing it. */
5234 if (caller_info
->ipcp_orig_node
)
5235 cst
= caller_info
->known_csts
[fidx
];
5238 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
5239 gcc_assert (lat
->is_single_const ());
5240 cst
= lat
->values
->value
;
5242 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
5243 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
5246 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
5247 if (cuses
== IPA_UNDESCRIBED_USE
)
5249 gcc_assert (cuses
> 0);
5251 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
5252 ipa_set_jf_pass_through_refdesc_decremented (jfunc
, true);
5253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5254 fprintf (dump_file
, " Controlled uses of parameter %i of %s dropped "
5255 "to %i.\n", fidx
, caller
->dump_name (), cuses
);
5259 if (caller_info
->ipcp_orig_node
)
5261 /* Cloning machinery has created a reference here, we need to either
5262 remove it or change it to a read one. */
5263 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0, IPA_REF_ADDR
);
5266 to_del
->remove_reference ();
5268 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5269 cs
->caller
->dump_name (), symbol
->dump_name ());
5270 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5272 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5275 " ...and replaced it with LOAD one.\n");
5280 symbol_and_index_together pack
;
5281 pack
.symbol
= symbol
;
5283 if (caller
->can_change_signature
)
5284 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5289 /* Return true if we would like to remove a parameter from NODE when cloning it
5290 with KNOWN_CSTS scalar constants. */
5293 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5295 auto_vec
<bool, 16> surviving
;
5296 bool filled_vec
= false;
5297 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5298 int i
, count
= ipa_get_param_count (info
);
5300 for (i
= 0; i
< count
; i
++)
5302 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5307 clone_info
*info
= clone_info::get (node
);
5308 if (!info
|| !info
->param_adjustments
)
5310 info
->param_adjustments
->get_surviving_params (&surviving
);
5313 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5319 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5320 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5321 redirect all edges in CALLERS to it. */
5323 static struct cgraph_node
*
5324 create_specialized_node (struct cgraph_node
*node
,
5325 vec
<tree
> known_csts
,
5326 vec
<ipa_polymorphic_call_context
> known_contexts
,
5327 vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5328 vec
<cgraph_edge
*> &callers
)
5330 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5331 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5332 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5333 struct cgraph_node
*new_node
;
5334 int i
, count
= ipa_get_param_count (info
);
5335 clone_info
*cinfo
= clone_info::get (node
);
5336 ipa_param_adjustments
*old_adjustments
= cinfo
5337 ? cinfo
->param_adjustments
: NULL
;
5338 ipa_param_adjustments
*new_adjustments
;
5339 gcc_assert (!info
->ipcp_orig_node
);
5340 gcc_assert (node
->can_change_signature
5341 || !old_adjustments
);
5343 if (old_adjustments
)
5345 /* At the moment all IPA optimizations should use the number of
5346 parameters of the prevailing decl as the m_always_copy_start.
5347 Handling any other value would complicate the code below, so for the
5348 time bing let's only assert it is so. */
5349 gcc_assert (old_adjustments
->m_always_copy_start
== count
5350 || old_adjustments
->m_always_copy_start
< 0);
5351 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5352 for (i
= 0; i
< old_adj_count
; i
++)
5354 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5355 if (!node
->can_change_signature
5356 || old_adj
->op
!= IPA_PARAM_OP_COPY
5357 || (!known_csts
[old_adj
->base_index
]
5358 && ipa_is_param_used (info
, old_adj
->base_index
)))
5360 ipa_adjusted_param new_adj
= *old_adj
;
5362 new_adj
.prev_clone_adjustment
= true;
5363 new_adj
.prev_clone_index
= i
;
5364 vec_safe_push (new_params
, new_adj
);
5367 bool skip_return
= old_adjustments
->m_skip_return
;
5368 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5369 ipa_param_adjustments (new_params
, count
,
5372 else if (node
->can_change_signature
5373 && want_remove_some_param_p (node
, known_csts
))
5375 ipa_adjusted_param adj
;
5376 memset (&adj
, 0, sizeof (adj
));
5377 adj
.op
= IPA_PARAM_OP_COPY
;
5378 for (i
= 0; i
< count
; i
++)
5379 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5382 adj
.prev_clone_index
= i
;
5383 vec_safe_push (new_params
, adj
);
5385 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5386 ipa_param_adjustments (new_params
, count
, false));
5389 new_adjustments
= NULL
;
5391 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5392 for (i
= callers
.length () - 1; i
>= 0; i
--)
5394 cgraph_edge
*cs
= callers
[i
];
5395 if (cs
->caller
== node
)
5397 self_recursive_calls
.safe_push (cs
);
5398 callers
.unordered_remove (i
);
5401 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5402 for (i
= 0; i
< count
; i
++)
5404 tree t
= known_csts
[i
];
5408 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5410 bool load_ref
= false;
5411 symtab_node
*ref_symbol
;
5412 if (TREE_CODE (t
) == ADDR_EXPR
)
5414 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5415 if (TREE_CODE (base
) == VAR_DECL
5416 && ipa_get_controlled_uses (info
, i
) == 0
5417 && ipa_get_param_load_dereferenced (info
, i
)
5418 && (ref_symbol
= symtab_node::get (base
)))
5421 if (node
->can_change_signature
)
5422 for (cgraph_edge
*caller
: callers
)
5423 adjust_references_in_caller (caller
, ref_symbol
, i
);
5427 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5429 vec_safe_push (replace_trees
, replace_map
);
5432 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5433 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5435 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5436 new_adjustments
, "constprop",
5440 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5441 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5443 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5444 /* Cloned edges can disappear during cloning as speculation can be
5445 resolved, check that we have one and that it comes from the last
5447 if (cs
&& cs
->caller
== new_node
)
5448 cs
->redirect_callee_duplicating_thunks (new_node
);
5449 /* Any future code that would make more than one clone of an outgoing
5450 edge would confuse this mechanism, so let's check that does not
5452 gcc_checking_assert (!cs
5453 || !get_next_cgraph_edge_clone (cs
)
5454 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5456 if (have_self_recursive_calls
)
5457 new_node
->expand_all_artificial_thunks ();
5459 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5460 for (const ipa_argagg_value
&av
: aggvals
)
5461 new_node
->maybe_create_reference (av
.value
, NULL
);
5463 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5465 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5466 if (known_contexts
.exists ())
5468 for (i
= 0; i
< count
; i
++)
5469 if (!known_contexts
[i
].useless_p ())
5471 fprintf (dump_file
, " known ctx %i is ", i
);
5472 known_contexts
[i
].dump (dump_file
);
5477 fprintf (dump_file
, " Aggregate replacements:");
5478 ipa_argagg_value_list
avs (aggvals
);
5479 avs
.dump (dump_file
);
5483 new_info
= ipa_node_params_sum
->get (new_node
);
5484 new_info
->ipcp_orig_node
= node
;
5485 new_node
->ipcp_clone
= true;
5486 new_info
->known_csts
= known_csts
;
5487 new_info
->known_contexts
= known_contexts
;
5489 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
,
5495 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5496 pass-through function to itself when the cgraph_node involved is not an
5497 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5498 no-operation pass-through. */
5501 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5504 enum availability availability
;
5505 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5506 && availability
> AVAIL_INTERPOSABLE
5507 && jfunc
->type
== IPA_JF_PASS_THROUGH
5508 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5509 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5510 && ipa_node_params_sum
->get (cs
->caller
)
5511 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5516 /* Return true if JFUNC, which describes a part of an aggregate represented or
5517 pointed to by the i-th parameter of call CS, is a pass-through function to
5518 itself when the cgraph_node involved is not an IPA-CP clone.. When
5519 SIMPLE is true, further check if JFUNC is a simple no-operation
5523 self_recursive_agg_pass_through_p (const cgraph_edge
*cs
,
5524 const ipa_agg_jf_item
*jfunc
,
5525 int i
, bool simple
= true)
5527 enum availability availability
;
5528 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5529 && availability
> AVAIL_INTERPOSABLE
5530 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5531 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5532 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5533 && jfunc
->value
.pass_through
.formal_id
== i
5534 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5535 && ipa_node_params_sum
->get (cs
->caller
)
5536 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5541 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5542 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5545 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5546 vec
<tree
> &known_csts
,
5547 const vec
<cgraph_edge
*> &callers
)
5549 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5550 int i
, count
= ipa_get_param_count (info
);
5552 for (i
= 0; i
< count
; i
++)
5554 struct cgraph_edge
*cs
;
5555 tree newval
= NULL_TREE
;
5558 tree type
= ipa_get_type (info
, i
);
5560 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5563 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5565 struct ipa_jump_func
*jump_func
;
5568 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5570 || i
>= ipa_get_cs_argument_count (args
)
5572 && call_passes_through_thunk (cs
)))
5577 jump_func
= ipa_get_ith_jump_func (args
, i
);
5579 /* Besides simple pass-through jump function, arithmetic jump
5580 function could also introduce argument-direct-pass-through for
5581 self-feeding recursive call. For example,
5588 Given that i is 0, recursive propagation via (i & 1) also gets
5590 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5592 gcc_assert (newval
);
5593 t
= ipa_get_jf_arith_result (
5594 ipa_get_jf_pass_through_operation (jump_func
),
5596 ipa_get_jf_pass_through_operand (jump_func
),
5600 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5604 && !values_equal_for_ipcp_p (t
, newval
))
5605 || (!first
&& !newval
))
5617 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5619 fprintf (dump_file
, " adding an extra known scalar value ");
5620 print_ipcp_constant_value (dump_file
, newval
);
5621 fprintf (dump_file
, " for ");
5622 ipa_dump_param (dump_file
, info
, i
);
5623 fprintf (dump_file
, "\n");
5626 known_csts
[i
] = newval
;
5631 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5632 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5636 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5637 vec
<ipa_polymorphic_call_context
>
5639 const vec
<cgraph_edge
*> &callers
)
5641 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5642 int i
, count
= ipa_get_param_count (info
);
5644 for (i
= 0; i
< count
; i
++)
5648 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5649 || (known_contexts
->exists ()
5650 && !(*known_contexts
)[i
].useless_p ()))
5653 ipa_polymorphic_call_context newval
;
5657 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5659 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5661 || i
>= ipa_get_cs_argument_count (args
))
5663 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5664 ipa_polymorphic_call_context ctx
;
5665 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5673 newval
.meet_with (ctx
);
5674 if (newval
.useless_p ())
5678 if (!newval
.useless_p ())
5680 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5682 fprintf (dump_file
, " adding an extra known polymorphic "
5684 print_ipcp_constant_value (dump_file
, newval
);
5685 fprintf (dump_file
, " for ");
5686 ipa_dump_param (dump_file
, info
, i
);
5687 fprintf (dump_file
, "\n");
5690 if (!known_contexts
->exists ())
5691 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5693 (*known_contexts
)[i
] = newval
;
5699 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5700 RES. If INTERIM is non-NULL, it contains the current interim state of
5701 collected aggregate values which can be used to compute values passed over
5702 self-recursive edges.
5704 This basically one iteration of push_agg_values_from_edge over one
5705 parameter, which allows for simpler early returns. */
5708 push_agg_values_for_index_from_edge (struct cgraph_edge
*cs
, int index
,
5709 vec
<ipa_argagg_value
> *res
,
5710 const ipa_argagg_value_list
*interim
)
5712 bool agg_values_from_caller
= false;
5713 bool agg_jf_preserved
= false;
5714 unsigned unit_delta
= UINT_MAX
;
5716 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
),
5719 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5720 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5722 agg_values_from_caller
= true;
5723 agg_jf_preserved
= ipa_get_jf_pass_through_agg_preserved (jfunc
);
5724 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5727 else if (jfunc
->type
== IPA_JF_ANCESTOR
5728 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5730 agg_values_from_caller
= true;
5731 agg_jf_preserved
= true;
5732 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5733 unit_delta
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
5736 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5737 if (agg_values_from_caller
)
5739 if (caller_info
->ipcp_orig_node
)
5741 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5742 ipcp_transformation
*ts
5743 = ipcp_get_transformation_summary (cs
->caller
);
5744 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5745 ipcp_param_lattices
*orig_plats
5746 = ipa_get_parm_lattices (orig_info
, src_idx
);
5749 && (agg_jf_preserved
|| !orig_plats
->aggs_by_ref
))
5751 ipa_argagg_value_list
src (ts
);
5752 src
.push_adjusted_values (src_idx
, index
, unit_delta
, res
);
5758 ipcp_param_lattices
*src_plats
5759 = ipa_get_parm_lattices (caller_info
, src_idx
);
5761 && !src_plats
->aggs_bottom
5762 && (agg_jf_preserved
|| !src_plats
->aggs_by_ref
))
5764 if (interim
&& self_recursive_pass_through_p (cs
, jfunc
, index
))
5766 interim
->push_adjusted_values (src_idx
, index
, unit_delta
,
5770 if (!src_plats
->aggs_contain_variable
)
5772 push_agg_values_from_plats (src_plats
, index
, unit_delta
,
5780 if (!jfunc
->agg
.items
)
5783 unsigned prev_unit_offset
= 0;
5784 for (const ipa_agg_jf_item
&agg_jf
: *jfunc
->agg
.items
)
5786 tree value
, srcvalue
;
5787 /* Besides simple pass-through aggregate jump function, arithmetic
5788 aggregate jump function could also bring same aggregate value as
5789 parameter passed-in for self-feeding recursive call. For example,
5797 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5799 && self_recursive_agg_pass_through_p (cs
, &agg_jf
, index
, false)
5800 && (srcvalue
= interim
->get_value(index
,
5801 agg_jf
.offset
/ BITS_PER_UNIT
)))
5802 value
= ipa_get_jf_arith_result (agg_jf
.value
.pass_through
.operation
,
5804 agg_jf
.value
.pass_through
.operand
,
5807 value
= ipa_agg_value_from_jfunc (caller_info
, cs
->caller
,
5811 struct ipa_argagg_value iav
;
5813 iav
.unit_offset
= agg_jf
.offset
/ BITS_PER_UNIT
;
5815 iav
.by_ref
= jfunc
->agg
.by_ref
;
5818 || iav
.unit_offset
> prev_unit_offset
);
5819 prev_unit_offset
= iav
.unit_offset
;
5822 res
->safe_push (iav
);
5828 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5829 description of ultimate callee of CS or the one it was cloned from (the
5830 summary where lattices are). If INTERIM is non-NULL, it contains the
5831 current interim state of collected aggregate values which can be used to
5832 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5833 is true) and to skip values which clearly will not be part of intersection
5837 push_agg_values_from_edge (struct cgraph_edge
*cs
,
5838 ipa_node_params
*dest_info
,
5839 vec
<ipa_argagg_value
> *res
,
5840 const ipa_argagg_value_list
*interim
,
5841 bool optimize_self_recursion
)
5843 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5847 int count
= MIN (ipa_get_param_count (dest_info
),
5848 ipa_get_cs_argument_count (args
));
5850 unsigned interim_index
= 0;
5851 for (int index
= 0; index
< count
; index
++)
5855 while (interim_index
< interim
->m_elts
.size ()
5856 && interim
->m_elts
[interim_index
].value
5857 && interim
->m_elts
[interim_index
].index
< index
)
5859 if (interim_index
>= interim
->m_elts
.size ()
5860 || interim
->m_elts
[interim_index
].index
> index
)
5864 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, index
);
5865 if (!ipa_is_param_used (dest_info
, index
)
5866 || plats
->aggs_bottom
)
5868 push_agg_values_for_index_from_edge (cs
, index
, res
,
5869 optimize_self_recursion
? interim
5875 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5876 from all of them. Return nullptr if there are none. */
5878 static struct vec
<ipa_argagg_value
, va_gc
> *
5879 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5880 const vec
<cgraph_edge
*> &callers
)
5882 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5883 if (dest_info
->ipcp_orig_node
)
5884 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5886 /* gather_edges_for_value puts a non-recursive call into the first element of
5887 callers if it can. */
5888 auto_vec
<ipa_argagg_value
, 32> interim
;
5889 push_agg_values_from_edge (callers
[0], dest_info
, &interim
, NULL
, true);
5891 unsigned valid_entries
= interim
.length ();
5895 unsigned caller_count
= callers
.length();
5896 for (unsigned i
= 1; i
< caller_count
; i
++)
5898 auto_vec
<ipa_argagg_value
, 32> last
;
5899 ipa_argagg_value_list
avs (&interim
);
5900 push_agg_values_from_edge (callers
[i
], dest_info
, &last
, &avs
, true);
5902 valid_entries
= intersect_argaggs_with (interim
, last
);
5907 vec
<ipa_argagg_value
, va_gc
> *res
= NULL
;
5908 vec_safe_reserve_exact (res
, valid_entries
);
5909 for (const ipa_argagg_value
&av
: interim
)
5911 res
->quick_push(av
);
5912 gcc_checking_assert (res
->length () == valid_entries
);
5916 /* Determine whether CS also brings all scalar values that the NODE is
5920 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5921 struct cgraph_node
*node
)
5923 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5924 int count
= ipa_get_param_count (dest_info
);
5925 class ipa_node_params
*caller_info
;
5926 class ipa_edge_args
*args
;
5929 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5930 args
= ipa_edge_args_sum
->get (cs
);
5931 for (i
= 0; i
< count
; i
++)
5933 struct ipa_jump_func
*jump_func
;
5936 val
= dest_info
->known_csts
[i
];
5940 if (i
>= ipa_get_cs_argument_count (args
))
5942 jump_func
= ipa_get_ith_jump_func (args
, i
);
5943 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5944 ipa_get_type (dest_info
, i
));
5945 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5951 /* Determine whether CS also brings all aggregate values that NODE is
5955 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5956 struct cgraph_node
*node
)
5958 ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
);
5959 if (!ts
|| vec_safe_is_empty (ts
->m_agg_values
))
5962 const ipa_argagg_value_list
existing (ts
->m_agg_values
);
5963 auto_vec
<ipa_argagg_value
, 32> edge_values
;
5964 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5965 gcc_checking_assert (dest_info
->ipcp_orig_node
);
5966 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5967 push_agg_values_from_edge (cs
, dest_info
, &edge_values
, &existing
, false);
5968 const ipa_argagg_value_list
avl (&edge_values
);
5969 return avl
.superset_of_p (existing
);
5972 /* Given an original NODE and a VAL for which we have already created a
5973 specialized clone, look whether there are incoming edges that still lead
5974 into the old node but now also bring the requested value and also conform to
5975 all other criteria such that they can be redirected the special node.
5976 This function can therefore redirect the final edge in a SCC. */
5978 template <typename valtype
>
5980 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5982 ipcp_value_source
<valtype
> *src
;
5983 profile_count redirected_sum
= profile_count::zero ();
5985 for (src
= val
->sources
; src
; src
= src
->next
)
5987 struct cgraph_edge
*cs
= src
->cs
;
5990 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5991 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5992 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5995 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5996 cs
->caller
->dump_name (),
5997 val
->spec_node
->dump_name ());
5999 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
6000 val
->spec_node
->expand_all_artificial_thunks ();
6001 if (cs
->count
.ipa ().initialized_p ())
6002 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
6004 cs
= get_next_cgraph_edge_clone (cs
);
6008 if (redirected_sum
.nonzero_p ())
6009 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
6012 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6015 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
6017 ipa_polymorphic_call_context
*ctx
;
6020 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
6021 if (!ctx
->useless_p ())
6026 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6028 static vec
<ipa_polymorphic_call_context
>
6029 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
6031 if (known_contexts_useful_p (known_contexts
))
6032 return known_contexts
.copy ();
6037 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6038 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6042 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6043 vec
<tree
> *known_csts
,
6044 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6045 ipcp_value
<tree
> *val
, int index
)
6047 *known_csts
= avals
->m_known_vals
.copy ();
6048 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6049 (*known_csts
)[index
] = val
->value
;
6052 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6053 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6057 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6058 vec
<tree
> *known_csts
,
6059 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6060 ipcp_value
<ipa_polymorphic_call_context
> *val
,
6063 *known_csts
= avals
->m_known_vals
.copy ();
6064 *known_contexts
= avals
->m_known_contexts
.copy ();
6065 (*known_contexts
)[index
] = val
->value
;
6068 /* Return true if OFFSET indicates this was not an aggregate value or there is
6069 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6073 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *aggvals
,
6074 int index
, HOST_WIDE_INT offset
, tree value
)
6079 const ipa_argagg_value_list
avl (aggvals
);
6080 tree v
= avl
.get_value (index
, offset
/ BITS_PER_UNIT
);
6081 return v
&& values_equal_for_ipcp_p (v
, value
);
6084 /* Return true if offset is minus one because source of a polymorphic context
6085 cannot be an aggregate value. */
6088 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *,
6089 int , HOST_WIDE_INT offset
,
6090 ipa_polymorphic_call_context
)
6092 return offset
== -1;
6095 /* Decide whether to create a special version of NODE for value VAL of
6096 parameter at the given INDEX. If OFFSET is -1, the value is for the
6097 parameter itself, otherwise it is stored at the given OFFSET of the
6098 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6099 is a vector which contains clones created for self-recursive calls with an
6100 arithmetic pass-through jump function. */
6102 template <typename valtype
>
6104 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6105 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6106 vec
<cgraph_node
*> *self_gen_clones
)
6110 profile_count count_sum
, rec_count_sum
;
6111 vec
<cgraph_edge
*> callers
;
6115 perhaps_add_new_callers (node
, val
);
6118 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6121 fprintf (dump_file
, " Ignoring candidate value because "
6122 "maximum unit size would be reached with %li.\n",
6123 val
->local_size_cost
+ overall_size
);
6126 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6127 &rec_count_sum
, &count_sum
))
6130 if (!dbg_cnt (ipa_cp_values
))
6133 if (val
->self_recursion_generated_p ())
6135 /* The edge counts in this case might not have been adjusted yet.
6136 Nevertleless, even if they were it would be only a guesswork which we
6137 can do now. The recursive part of the counts can be derived from the
6138 count of the original node anyway. */
6139 if (node
->count
.ipa ().nonzero_p ())
6141 unsigned dem
= self_gen_clones
->length () + 1;
6142 rec_count_sum
= node
->count
.ipa () / dem
;
6145 rec_count_sum
= profile_count::zero ();
6148 /* get_info_about_necessary_edges only sums up ipa counts. */
6149 count_sum
+= rec_count_sum
;
6151 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6153 fprintf (dump_file
, " - considering value ");
6154 print_ipcp_constant_value (dump_file
, val
->value
);
6155 fprintf (dump_file
, " for ");
6156 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6158 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6159 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6162 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6163 freq_sum
, count_sum
,
6164 val
->local_size_cost
)
6165 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6166 freq_sum
, count_sum
, val
->prop_size_cost
))
6170 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6171 node
->dump_name ());
6173 vec
<tree
> known_csts
;
6174 vec
<ipa_polymorphic_call_context
> known_contexts
;
6176 callers
= gather_edges_for_value (val
, node
, caller_count
);
6178 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6181 known_csts
= avals
->m_known_vals
.copy ();
6182 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6184 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6185 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6186 vec
<ipa_argagg_value
, va_gc
> *aggvals
6187 = find_aggregate_values_for_callers_subset (node
, callers
);
6188 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6189 offset
, val
->value
));
6190 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6193 if (val
->self_recursion_generated_p ())
6194 self_gen_clones
->safe_push (val
->spec_node
);
6196 update_profiling_info (node
, val
->spec_node
);
6199 overall_size
+= val
->local_size_cost
;
6200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6201 fprintf (dump_file
, " overall size reached %li\n",
6204 /* TODO: If for some lattice there is only one other known value
6205 left, make a special node for it too. */
6210 /* Like irange::contains_p(), but convert VAL to the range of R if
6214 ipa_range_contains_p (const vrange
&r
, tree val
)
6216 if (r
.undefined_p ())
6219 tree type
= r
.type ();
6220 if (!wi::fits_to_tree_p (wi::to_wide (val
), type
))
6223 val
= fold_convert (type
, val
);
6224 return r
.contains_p (val
);
6227 /* Decide whether and what specialized clones of NODE should be created. */
6230 decide_whether_version_node (struct cgraph_node
*node
)
6232 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6233 int i
, count
= ipa_get_param_count (info
);
6239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6240 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6241 node
->dump_name ());
6243 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6244 ipa_auto_call_arg_values avals
;
6245 gather_context_independent_values (info
, &avals
, false, NULL
);
6247 for (i
= 0; i
< count
;i
++)
6249 if (!ipa_is_param_used (info
, i
))
6252 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6253 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6254 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6257 && !avals
.m_known_vals
[i
])
6259 ipcp_value
<tree
> *val
;
6260 for (val
= lat
->values
; val
; val
= val
->next
)
6262 /* If some values generated for self-recursive calls with
6263 arithmetic jump functions fall outside of the known
6264 value_range for the parameter, we can skip them. VR interface
6265 supports this only for integers now. */
6266 if (TREE_CODE (val
->value
) == INTEGER_CST
6267 && !plats
->m_value_range
.bottom_p ()
6268 && !ipa_range_contains_p (plats
->m_value_range
.m_vr
,
6271 /* This can happen also if a constant present in the source
6272 code falls outside of the range of parameter's type, so we
6274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6276 fprintf (dump_file
, " - skipping%s value ",
6277 val
->self_recursion_generated_p ()
6278 ? " self_recursion_generated" : "");
6279 print_ipcp_constant_value (dump_file
, val
->value
);
6280 fprintf (dump_file
, " because it is outside known "
6285 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6290 if (!plats
->aggs_bottom
)
6292 struct ipcp_agg_lattice
*aglat
;
6293 ipcp_value
<tree
> *val
;
6294 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6295 if (!aglat
->bottom
&& aglat
->values
6296 /* If the following is false, the one value has been considered
6297 for cloning for all contexts. */
6298 && (plats
->aggs_contain_variable
6299 || !aglat
->is_single_const ()))
6300 for (val
= aglat
->values
; val
; val
= val
->next
)
6301 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6306 && avals
.m_known_contexts
[i
].useless_p ())
6308 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6309 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6310 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6315 if (!self_gen_clones
.is_empty ())
6317 self_gen_clones
.safe_push (node
);
6318 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6321 if (info
->do_clone_for_all_contexts
)
6323 if (!dbg_cnt (ipa_cp_values
))
6325 info
->do_clone_for_all_contexts
= false;
6329 struct cgraph_node
*clone
;
6330 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6332 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6334 cgraph_edge
*cs
= callers
[i
];
6335 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6337 if (caller_info
&& caller_info
->node_dead
)
6338 callers
.unordered_remove (i
);
6341 if (!adjust_callers_for_value_intersection (callers
, node
))
6343 /* If node is not called by anyone, or all its caller edges are
6344 self-recursive, the node is not really in use, no need to do
6346 info
->do_clone_for_all_contexts
= false;
6351 fprintf (dump_file
, " - Creating a specialized node of %s "
6352 "for all known contexts.\n", node
->dump_name ());
6354 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6355 vec
<ipa_polymorphic_call_context
> known_contexts
6356 = copy_useful_known_contexts (avals
.m_known_contexts
);
6357 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6358 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6359 vec
<ipa_argagg_value
, va_gc
> *aggvals
6360 = find_aggregate_values_for_callers_subset (node
, callers
);
6362 if (!known_contexts_useful_p (known_contexts
))
6364 known_contexts
.release ();
6365 known_contexts
= vNULL
;
6367 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6369 info
->do_clone_for_all_contexts
= false;
6370 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6377 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6380 spread_undeadness (struct cgraph_node
*node
)
6382 struct cgraph_edge
*cs
;
6384 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6385 if (ipa_edge_within_scc (cs
))
6387 struct cgraph_node
*callee
;
6388 class ipa_node_params
*info
;
6390 callee
= cs
->callee
->function_symbol (NULL
);
6391 info
= ipa_node_params_sum
->get (callee
);
6393 if (info
&& info
->node_dead
)
6395 info
->node_dead
= 0;
6396 spread_undeadness (callee
);
6401 /* Return true if NODE has a caller from outside of its SCC that is not
6402 dead. Worker callback for cgraph_for_node_and_aliases. */
6405 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6406 void *data ATTRIBUTE_UNUSED
)
6408 struct cgraph_edge
*cs
;
6410 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6411 if (cs
->caller
->thunk
6412 && cs
->caller
->call_for_symbol_thunks_and_aliases
6413 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6415 else if (!ipa_edge_within_scc (cs
))
6417 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6418 if (!caller_info
/* Unoptimized caller are like dead ones. */
6419 || !caller_info
->node_dead
)
6426 /* Identify nodes within the same SCC as NODE which are no longer needed
6427 because of new clones and will be removed as unreachable. */
6430 identify_dead_nodes (struct cgraph_node
*node
)
6432 struct cgraph_node
*v
;
6433 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6436 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6438 && !v
->call_for_symbol_thunks_and_aliases
6439 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6440 info
->node_dead
= 1;
6443 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6445 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6446 if (info
&& !info
->node_dead
)
6447 spread_undeadness (v
);
6450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6452 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6453 if (ipa_node_params_sum
->get (v
)
6454 && ipa_node_params_sum
->get (v
)->node_dead
)
6455 fprintf (dump_file
, " Marking node as dead: %s.\n",
6460 /* The decision stage. Iterate over the topological order of call graph nodes
6461 TOPO and make specialized clones if deemed beneficial. */
6464 ipcp_decision_stage (class ipa_topo_info
*topo
)
6469 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6471 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6473 struct cgraph_node
*node
= topo
->order
[i
];
6474 bool change
= false, iterate
= true;
6478 struct cgraph_node
*v
;
6480 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6481 if (v
->has_gimple_body_p ()
6482 && ipcp_versionable_function_p (v
))
6483 iterate
|= decide_whether_version_node (v
);
6488 identify_dead_nodes (node
);
6492 /* Look up all the bits information that we have discovered and copy it over
6493 to the transformation summary. */
6496 ipcp_store_bits_results (void)
6500 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6502 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6503 bool dumped_sth
= false;
6504 bool found_useful_result
= false;
6506 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
6509 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
6510 "; -fipa-bit-cp: disabled.\n",
6511 node
->dump_name ());
6515 if (info
->ipcp_orig_node
)
6516 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6517 if (!info
->lattices
)
6518 /* Newly expanded artificial thunks do not have lattices. */
6521 unsigned count
= ipa_get_param_count (info
);
6522 for (unsigned i
= 0; i
< count
; i
++)
6524 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6525 if (plats
->bits_lattice
.constant_p ())
6527 found_useful_result
= true;
6532 if (!found_useful_result
)
6535 ipcp_transformation_initialize ();
6536 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6537 vec_safe_reserve_exact (ts
->bits
, count
);
6539 for (unsigned i
= 0; i
< count
; i
++)
6541 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6544 if (plats
->bits_lattice
.constant_p ())
6547 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
6548 plats
->bits_lattice
.get_mask ());
6549 if (!dbg_cnt (ipa_cp_bits
))
6555 ts
->bits
->quick_push (jfbits
);
6556 if (!dump_file
|| !jfbits
)
6560 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6561 node
->dump_name ());
6564 fprintf (dump_file
, " param %i: value = ", i
);
6565 print_hex (jfbits
->value
, dump_file
);
6566 fprintf (dump_file
, ", mask = ");
6567 print_hex (jfbits
->mask
, dump_file
);
6568 fprintf (dump_file
, "\n");
6573 /* Look up all VR information that we have discovered and copy it over
6574 to the transformation summary. */
6577 ipcp_store_vr_results (void)
6581 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6583 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6584 bool found_useful_result
= false;
6586 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6589 fprintf (dump_file
, "Not considering %s for VR discovery "
6590 "and propagate; -fipa-ipa-vrp: disabled.\n",
6591 node
->dump_name ());
6595 if (info
->ipcp_orig_node
)
6596 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6597 if (!info
->lattices
)
6598 /* Newly expanded artificial thunks do not have lattices. */
6601 unsigned count
= ipa_get_param_count (info
);
6602 for (unsigned i
= 0; i
< count
; i
++)
6604 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6605 if (!plats
->m_value_range
.bottom_p ()
6606 && !plats
->m_value_range
.top_p ())
6608 found_useful_result
= true;
6612 if (!found_useful_result
)
6615 ipcp_transformation_initialize ();
6616 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6617 vec_safe_reserve_exact (ts
->m_vr
, count
);
6619 for (unsigned i
= 0; i
< count
; i
++)
6621 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6623 if (!plats
->m_value_range
.bottom_p ()
6624 && !plats
->m_value_range
.top_p ()
6625 && dbg_cnt (ipa_cp_vr
))
6627 ipa_vr
vr (plats
->m_value_range
.m_vr
);
6628 ts
->m_vr
->quick_push (vr
);
6633 ts
->m_vr
->quick_push (vr
);
6639 /* The IPCP driver. */
6644 class ipa_topo_info topo
;
6646 if (edge_clone_summaries
== NULL
)
6647 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6649 ipa_check_create_node_params ();
6650 ipa_check_create_edge_args ();
6651 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6655 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6656 if (dump_flags
& TDF_DETAILS
)
6657 ipa_print_all_params (dump_file
);
6658 ipa_print_all_jump_functions (dump_file
);
6661 /* Topological sort. */
6662 build_toporder_info (&topo
);
6663 /* Do the interprocedural propagation. */
6664 ipcp_propagate_stage (&topo
);
6665 /* Decide what constant propagation and cloning should be performed. */
6666 ipcp_decision_stage (&topo
);
6667 /* Store results of bits propagation. */
6668 ipcp_store_bits_results ();
6669 /* Store results of value range propagation. */
6670 ipcp_store_vr_results ();
6672 /* Free all IPCP structures. */
6673 delete clone_num_suffixes
;
6674 free_toporder_info (&topo
);
6675 delete edge_clone_summaries
;
6676 edge_clone_summaries
= NULL
;
6677 ipa_free_all_structures_after_ipa_cp ();
6679 fprintf (dump_file
, "\nIPA constant propagation end\n");
6683 /* Initialization and computation of IPCP data structures. This is the initial
6684 intraprocedural analysis of functions, which gathers information to be
6685 propagated later on. */
6688 ipcp_generate_summary (void)
6690 struct cgraph_node
*node
;
6693 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6694 ipa_register_cgraph_hooks ();
6696 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6697 ipa_analyze_node (node
);
6702 const pass_data pass_data_ipa_cp
=
6704 IPA_PASS
, /* type */
6706 OPTGROUP_NONE
, /* optinfo_flags */
6707 TV_IPA_CONSTANT_PROP
, /* tv_id */
6708 0, /* properties_required */
6709 0, /* properties_provided */
6710 0, /* properties_destroyed */
6711 0, /* todo_flags_start */
6712 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6715 class pass_ipa_cp
: public ipa_opt_pass_d
6718 pass_ipa_cp (gcc::context
*ctxt
)
6719 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6720 ipcp_generate_summary
, /* generate_summary */
6721 NULL
, /* write_summary */
6722 NULL
, /* read_summary */
6723 ipcp_write_transformation_summaries
, /*
6724 write_optimization_summary */
6725 ipcp_read_transformation_summaries
, /*
6726 read_optimization_summary */
6727 NULL
, /* stmt_fixup */
6728 0, /* function_transform_todo_flags_start */
6729 ipcp_transform_function
, /* function_transform */
6730 NULL
) /* variable_transform */
6733 /* opt_pass methods: */
6734 bool gate (function
*) final override
6736 /* FIXME: We should remove the optimize check after we ensure we never run
6737 IPA passes when not optimizing. */
6738 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6741 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6743 }; // class pass_ipa_cp
6748 make_pass_ipa_cp (gcc::context
*ctxt
)
6750 return new pass_ipa_cp (ctxt
);
6753 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6754 within the same process. For use by toplev::finalize. */
6757 ipa_cp_cc_finalize (void)
6759 base_count
= profile_count::uninitialized ();
6761 orig_overall_size
= 0;
6762 ipcp_free_transformation_sum ();