1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
103 #define INCLUDE_ALGORITHM
106 #include "coretypes.h"
109 #include "gimple-expr.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype
> class ipcp_value
;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype
>
138 struct ipcp_value_source
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset
;
144 /* The incoming edge that brought the value. */
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value
<valtype
> *val
;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source
*next
;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit
;
166 /* Time benefit that specializing the function for this value can bring about
168 sreal prop_time_benefit
;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
172 /* Size cost that specializing the function for this value can bring about in
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype
>
184 class ipcp_value
: public ipcp_value_base
187 /* The actual value for the given parameter. */
189 /* The list of sources from which this value originates. */
190 ipcp_value_source
<valtype
> *sources
= nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value
*next
= nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
195 ipcp_value
*scc_next
= nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value
*topo_next
= nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
201 cgraph_node
*spec_node
= nullptr;
202 /* Depth first search number and low link for topological sorting of
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
214 unsigned self_recursion_generated_level
= 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack
= false;
218 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
219 HOST_WIDE_INT offset
);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value
<valtype
> *o
)
225 return o
->scc_no
== scc_no
;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level
> 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype
>
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value
<valtype
> *values
;
252 /* Number of known values and types in this lattice. */
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable
;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval
, cgraph_edge
*cs
,
264 ipcp_value
<valtype
> *src_val
= NULL
,
265 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
266 ipcp_value
<valtype
> **val_p
= NULL
,
267 unsigned same_lat_gen_level
= 0);
268 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
271 /* Lattice of tree values with an offset to describe a part of an
274 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset
;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice
*next
;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
312 bool bottom_p () const { return m_lattice_val
== IPA_BITS_VARYING
; }
313 bool top_p () const { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
314 bool constant_p () const { return m_lattice_val
== IPA_BITS_CONSTANT
; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int
, widest_int
);
317 bool known_nonzero_p () const;
319 widest_int
get_value () const { return m_value
; }
320 widest_int
get_mask () const { return m_mask
; }
322 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
323 enum tree_code
, tree
, bool);
325 bool meet_with (widest_int
, widest_int
, unsigned);
330 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value
, m_mask
;
337 bool meet_with_1 (widest_int
, widest_int
, unsigned, bool);
338 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange
&p_vr
);
352 bool meet_with (const ipcp_vr_lattice
&other
);
353 void init (tree type
);
354 void print (FILE * f
);
357 bool meet_with_1 (const vrange
&other_vr
);
361 ipcp_vr_lattice::init (tree type
)
364 m_vr
.set_type (type
);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice
<tree
> itself
;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice
*aggs
;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice
;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range
;
386 /* Number of aggregate lattices */
388 /* True if aggregate data were passed by reference (as opposed to by
391 /* All aggregate lattices contain a variable component (in addition to
393 bool aggs_contain_variable
;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
398 /* There is a virtual call based on this parameter. */
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count
;
420 /* Original overall size of the program. */
422 static long overall_size
, orig_overall_size
;
424 /* Node name to unique clone suffix number map. */
425 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices
*
430 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
432 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
433 gcc_checking_assert (!info
->ipcp_orig_node
);
434 gcc_checking_assert (info
->lattices
);
435 return &(info
->lattices
[i
]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice
<tree
> *
441 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
443 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
444 return &plats
->itself
;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
450 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
452 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
453 return &plats
->ctxlat
;
456 /* Return whether LAT is a lattice with a single constant and without an
459 template <typename valtype
>
461 ipcp_lattice
<valtype
>::is_single_const ()
463 if (bottom
|| contains_variable
|| values_count
!= 1)
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
472 values_equal_for_ipcp_p (tree x
, tree y
)
474 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
479 if (TREE_CODE (x
) == ADDR_EXPR
480 && TREE_CODE (y
) == ADDR_EXPR
481 && (TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
482 || (TREE_CODE (TREE_OPERAND (x
, 0)) == VAR_DECL
483 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x
, 0))))
484 && (TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
485 || (TREE_CODE (TREE_OPERAND (y
, 0)) == VAR_DECL
486 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y
, 0)))))
487 return TREE_OPERAND (x
, 0) == TREE_OPERAND (y
, 0)
488 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
489 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
491 return operand_equal_p (x
, y
, 0);
494 /* Print V which is extracted from a value in a lattice to F. */
497 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
502 /* Print a lattice LAT to F. */
504 template <typename valtype
>
506 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
508 ipcp_value
<valtype
> *val
;
513 fprintf (f
, "BOTTOM\n");
517 if (!values_count
&& !contains_variable
)
519 fprintf (f
, "TOP\n");
523 if (contains_variable
)
525 fprintf (f
, "VARIABLE");
531 for (val
= values
; val
; val
= val
->next
)
533 if (dump_benefits
&& prev
)
535 else if (!dump_benefits
&& prev
)
540 print_ipcp_constant_value (f
, val
->value
);
544 ipcp_value_source
<valtype
> *s
;
546 if (val
->self_recursion_generated_p ())
547 fprintf (f
, " [self_gen(%i), from:",
548 val
->self_recursion_generated_level
);
550 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
551 for (s
= val
->sources
; s
; s
= s
->next
)
552 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
553 s
->cs
->sreal_frequency ().to_double ());
558 fprintf (f
, " [loc_time: %g, loc_size: %i, "
559 "prop_time: %g, prop_size: %i]\n",
560 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
561 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
568 ipcp_bits_lattice::print (FILE *f
)
571 fprintf (f
, " Bits unknown (TOP)\n");
572 else if (bottom_p ())
573 fprintf (f
, " Bits unusable (BOTTOM)\n");
576 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
577 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
582 /* Print value range lattice to F. */
585 ipcp_vr_lattice::print (FILE * f
)
590 /* Print all ipcp_lattices of all functions to F. */
593 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
595 struct cgraph_node
*node
;
598 fprintf (f
, "\nLattices:\n");
599 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
601 class ipa_node_params
*info
;
603 info
= ipa_node_params_sum
->get (node
);
604 /* Skip unoptimized functions and constprop clones since we don't make
605 lattices for them. */
606 if (!info
|| info
->ipcp_orig_node
)
608 fprintf (f
, " Node: %s:\n", node
->dump_name ());
609 count
= ipa_get_param_count (info
);
610 for (i
= 0; i
< count
; i
++)
612 struct ipcp_agg_lattice
*aglat
;
613 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
614 fprintf (f
, " param [%d]: ", i
);
615 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
616 fprintf (f
, " ctxs: ");
617 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
618 plats
->bits_lattice
.print (f
);
620 plats
->m_value_range
.print (f
);
622 if (plats
->virt_call
)
623 fprintf (f
, " virt_call flag set\n");
625 if (plats
->aggs_bottom
)
627 fprintf (f
, " AGGS BOTTOM\n");
630 if (plats
->aggs_contain_variable
)
631 fprintf (f
, " AGGS VARIABLE\n");
632 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
634 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
635 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
636 aglat
->print (f
, dump_sources
, dump_benefits
);
642 /* Determine whether it is at all technically possible to create clones of NODE
643 and store this information in the ipa_node_params structure associated
647 determine_versionability (struct cgraph_node
*node
,
648 class ipa_node_params
*info
)
650 const char *reason
= NULL
;
652 /* There are a number of generic reasons functions cannot be versioned. We
653 also cannot remove parameters if there are type attributes such as fnspec
655 if (node
->alias
|| node
->thunk
)
656 reason
= "alias or thunk";
657 else if (!node
->versionable
)
658 reason
= "not a tree_versionable_function";
659 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
660 reason
= "insufficient body availability";
661 else if (!opt_for_fn (node
->decl
, optimize
)
662 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
663 reason
= "non-optimized function";
664 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
666 /* Ideally we should clone the SIMD clones themselves and create
667 vector copies of them, so IPA-cp and SIMD clones can happily
668 coexist, but that may not be worth the effort. */
669 reason
= "function has SIMD clones";
671 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
673 /* Ideally we should clone the target clones themselves and create
674 copies of them, so IPA-cp and target clones can happily
675 coexist, but that may not be worth the effort. */
676 reason
= "function target_clones attribute";
678 /* Don't clone decls local to a comdat group; it breaks and for C++
679 decloned constructors, inlining is always better anyway. */
680 else if (node
->comdat_local_p ())
681 reason
= "comdat-local function";
682 else if (node
->calls_comdat_local
)
684 /* TODO: call is versionable if we make sure that all
685 callers are inside of a comdat group. */
686 reason
= "calls comdat-local function";
689 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
690 work only when inlined. Cloning them may still lead to better code
691 because ipa-cp will not give up on cloning further. If the function is
692 external this however leads to wrong code because we may end up producing
693 offline copy of the function. */
694 if (DECL_EXTERNAL (node
->decl
))
695 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
696 edge
= edge
->next_callee
)
697 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
699 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
700 reason
= "external function which calls va_arg_pack";
701 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
702 == BUILT_IN_VA_ARG_PACK_LEN
)
703 reason
= "external function which calls va_arg_pack_len";
706 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
707 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
708 node
->dump_name (), reason
);
710 info
->versionable
= (reason
== NULL
);
713 /* Return true if it is at all technically possible to create clones of a
717 ipcp_versionable_function_p (struct cgraph_node
*node
)
719 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
720 return info
&& info
->versionable
;
723 /* Structure holding accumulated information about callers of a node. */
725 struct caller_statistics
727 /* If requested (see below), self-recursive call counts are summed into this
729 profile_count rec_count_sum
;
730 /* The sum of all ipa counts of all the other (non-recursive) calls. */
731 profile_count count_sum
;
732 /* Sum of all frequencies for all calls. */
734 /* Number of calls and hot calls respectively. */
735 int n_calls
, n_hot_calls
;
736 /* If itself is set up, also count the number of non-self-recursive
739 /* If non-NULL, this is the node itself and calls from it should have their
740 counts included in rec_count_sum and not count_sum. */
744 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
745 from IGNORED_CALLER are not counted. */
748 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
750 stats
->rec_count_sum
= profile_count::zero ();
751 stats
->count_sum
= profile_count::zero ();
753 stats
->n_hot_calls
= 0;
754 stats
->n_nonrec_calls
= 0;
756 stats
->itself
= itself
;
759 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
760 non-thunk incoming edges to NODE. */
763 gather_caller_stats (struct cgraph_node
*node
, void *data
)
765 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
766 struct cgraph_edge
*cs
;
768 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
769 if (!cs
->caller
->thunk
)
771 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
772 if (info
&& info
->node_dead
)
775 if (cs
->count
.ipa ().initialized_p ())
777 if (stats
->itself
&& stats
->itself
== cs
->caller
)
778 stats
->rec_count_sum
+= cs
->count
.ipa ();
780 stats
->count_sum
+= cs
->count
.ipa ();
782 stats
->freq_sum
+= cs
->sreal_frequency ();
784 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
785 stats
->n_nonrec_calls
++;
787 if (cs
->maybe_hot_p ())
788 stats
->n_hot_calls
++;
794 /* Return true if this NODE is viable candidate for cloning. */
797 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
799 struct caller_statistics stats
;
801 gcc_checking_assert (node
->has_gimple_body_p ());
803 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
806 fprintf (dump_file
, "Not considering %s for cloning; "
807 "-fipa-cp-clone disabled.\n",
812 if (node
->optimize_for_size_p ())
815 fprintf (dump_file
, "Not considering %s for cloning; "
816 "optimizing it for size.\n",
821 init_caller_stats (&stats
);
822 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
824 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
827 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
832 /* When profile is available and function is hot, propagate into it even if
833 calls seems cold; constant propagation can improve function's speed
835 if (stats
.count_sum
> profile_count::zero ()
836 && node
->count
.ipa ().initialized_p ())
838 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
841 fprintf (dump_file
, "Considering %s for cloning; "
842 "usually called directly.\n",
847 if (!stats
.n_hot_calls
)
850 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
855 fprintf (dump_file
, "Considering %s for cloning.\n",
860 template <typename valtype
>
861 class value_topo_info
864 /* Head of the linked list of topologically sorted values. */
865 ipcp_value
<valtype
> *values_topo
;
866 /* Stack for creating SCCs, represented by a linked list too. */
867 ipcp_value
<valtype
> *stack
;
868 /* Counter driving the algorithm in add_val_to_toposort. */
871 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
873 void add_val (ipcp_value
<valtype
> *cur_val
);
874 void propagate_effects ();
877 /* Arrays representing a topological ordering of call graph nodes and a stack
878 of nodes used during constant propagation and also data required to perform
879 topological sort of values and propagation of benefits in the determined
885 /* Array with obtained topological order of cgraph nodes. */
886 struct cgraph_node
**order
;
887 /* Stack of cgraph nodes used during propagation within SCC until all values
888 in the SCC stabilize. */
889 struct cgraph_node
**stack
;
890 int nnodes
, stack_top
;
892 value_topo_info
<tree
> constants
;
893 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
895 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
900 /* Skip edges from and to nodes without ipa_cp enabled.
901 Ignore not available symbols. */
904 ignore_edge_p (cgraph_edge
*e
)
906 enum availability avail
;
907 cgraph_node
*ultimate_target
908 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
910 return (avail
<= AVAIL_INTERPOSABLE
911 || !opt_for_fn (ultimate_target
->decl
, optimize
)
912 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
915 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
918 build_toporder_info (class ipa_topo_info
*topo
)
920 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
921 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
923 gcc_checking_assert (topo
->stack_top
== 0);
924 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
928 /* Free information about strongly connected components and the arrays in
932 free_toporder_info (class ipa_topo_info
*topo
)
934 ipa_free_postorder_info ();
939 /* Add NODE to the stack in TOPO, unless it is already there. */
942 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
944 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
945 if (info
->node_enqueued
)
947 info
->node_enqueued
= 1;
948 topo
->stack
[topo
->stack_top
++] = node
;
951 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
954 static struct cgraph_node
*
955 pop_node_from_stack (class ipa_topo_info
*topo
)
959 struct cgraph_node
*node
;
961 node
= topo
->stack
[topo
->stack_top
];
962 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
969 /* Set lattice LAT to bottom and return true if it previously was not set as
972 template <typename valtype
>
974 ipcp_lattice
<valtype
>::set_to_bottom ()
981 /* Mark lattice as containing an unknown value and return true if it previously
982 was not marked as such. */
984 template <typename valtype
>
986 ipcp_lattice
<valtype
>::set_contains_variable ()
988 bool ret
= !contains_variable
;
989 contains_variable
= true;
993 /* Set all aggregate lattices in PLATS to bottom and return true if they were
994 not previously set as such. */
997 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
999 bool ret
= !plats
->aggs_bottom
;
1000 plats
->aggs_bottom
= true;
1004 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1005 return true if they were not previously marked as such. */
1008 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
1010 bool ret
= !plats
->aggs_contain_variable
;
1011 plats
->aggs_contain_variable
= true;
1016 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
1018 return meet_with_1 (other
.m_vr
);
1021 /* Meet the current value of the lattice with the range described by
1025 ipcp_vr_lattice::meet_with (const vrange
&p_vr
)
1027 return meet_with_1 (p_vr
);
1030 /* Meet the current value of the lattice with the range described by
1031 OTHER_VR. Return TRUE if anything changed. */
1034 ipcp_vr_lattice::meet_with_1 (const vrange
&other_vr
)
1039 if (other_vr
.varying_p ())
1040 return set_to_bottom ();
1045 Value_Range
save (m_vr
);
1046 res
= m_vr
.union_ (other_vr
);
1047 gcc_assert (res
== (m_vr
!= save
));
1050 res
= m_vr
.union_ (other_vr
);
1054 /* Return true if value range information in the lattice is yet unknown. */
1057 ipcp_vr_lattice::top_p () const
1059 return m_vr
.undefined_p ();
1062 /* Return true if value range information in the lattice is known to be
1066 ipcp_vr_lattice::bottom_p () const
1068 return m_vr
.varying_p ();
1071 /* Set value range information in the lattice to bottom. Return true if it
1072 previously was in a different state. */
1075 ipcp_vr_lattice::set_to_bottom ()
1077 if (m_vr
.varying_p ())
1080 /* Setting an unsupported type here forces the temporary to default
1081 to unsupported_range, which can handle VARYING/DEFINED ranges,
1082 but nothing else (union, intersect, etc). This allows us to set
1083 bottoms on any ranges, and is safe as all users of the lattice
1084 check for bottom first. */
1085 m_vr
.set_type (void_type_node
);
1086 m_vr
.set_varying (void_type_node
);
1091 /* Set lattice value to bottom, if it already isn't the case. */
1094 ipcp_bits_lattice::set_to_bottom ()
1098 m_lattice_val
= IPA_BITS_VARYING
;
1104 /* Set to constant if it isn't already. Only meant to be called
1105 when switching state from TOP. */
1108 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1110 gcc_assert (top_p ());
1111 m_lattice_val
= IPA_BITS_CONSTANT
;
1112 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1117 /* Return true if any of the known bits are non-zero. */
1120 ipcp_bits_lattice::known_nonzero_p () const
1124 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
1127 /* Convert operand to value, mask form. */
1130 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1132 wide_int
get_nonzero_bits (const_tree
);
1134 if (TREE_CODE (operand
) == INTEGER_CST
)
1136 *valuep
= wi::to_widest (operand
);
1146 /* Meet operation, similar to ccp_lattice_meet, we xor values
1147 if this->value, value have different values at same bit positions, we want
1148 to drop that bit to varying. Return true if mask is changed.
1149 This function assumes that the lattice value is in CONSTANT state. If
1150 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1153 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1154 unsigned precision
, bool drop_all_ones
)
1156 gcc_assert (constant_p ());
1158 widest_int old_mask
= m_mask
;
1159 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1164 if (wi::sext (m_mask
, precision
) == -1)
1165 return set_to_bottom ();
1167 return m_mask
!= old_mask
;
1170 /* Meet the bits lattice with operand
1171 described by <value, mask, sgn, precision. */
1174 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1182 if (wi::sext (mask
, precision
) == -1)
1183 return set_to_bottom ();
1184 return set_to_constant (value
, mask
);
1187 return meet_with_1 (value
, mask
, precision
, false);
1190 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1191 if code is binary operation or bit_value_unop (other) if code is unary op.
1192 In the case when code is nop_expr, no adjustment is required. If
1193 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1196 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1197 signop sgn
, enum tree_code code
, tree operand
,
1200 if (other
.bottom_p ())
1201 return set_to_bottom ();
1203 if (bottom_p () || other
.top_p ())
1206 widest_int adjusted_value
, adjusted_mask
;
1208 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1210 tree type
= TREE_TYPE (operand
);
1211 widest_int o_value
, o_mask
;
1212 get_value_and_mask (operand
, &o_value
, &o_mask
);
1214 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1215 sgn
, precision
, other
.get_value (), other
.get_mask (),
1216 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1218 if (wi::sext (adjusted_mask
, precision
) == -1)
1219 return set_to_bottom ();
1222 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1224 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1225 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1228 if (wi::sext (adjusted_mask
, precision
) == -1)
1229 return set_to_bottom ();
1233 return set_to_bottom ();
1239 adjusted_mask
|= adjusted_value
;
1240 adjusted_value
&= ~adjusted_mask
;
1242 if (wi::sext (adjusted_mask
, precision
) == -1)
1243 return set_to_bottom ();
1244 return set_to_constant (adjusted_value
, adjusted_mask
);
1247 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
1251 /* Dump the contents of the list to FILE. */
1254 ipa_argagg_value_list::dump (FILE *f
)
1257 for (const ipa_argagg_value
&av
: m_elts
)
1259 fprintf (f
, "%s %i[%u]=", comma
? "," : "",
1260 av
.index
, av
.unit_offset
);
1261 print_generic_expr (f
, av
.value
);
1263 fprintf (f
, "(by_ref)");
1265 fprintf (f
, "(killed)");
1271 /* Dump the contents of the list to stderr. */
1274 ipa_argagg_value_list::debug ()
1279 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1280 NULL if there is no such constant. */
1282 const ipa_argagg_value
*
1283 ipa_argagg_value_list::get_elt (int index
, unsigned unit_offset
) const
1285 ipa_argagg_value key
;
1287 key
.unit_offset
= unit_offset
;
1288 const ipa_argagg_value
*res
1289 = std::lower_bound (m_elts
.begin (), m_elts
.end (), key
,
1290 [] (const ipa_argagg_value
&elt
,
1291 const ipa_argagg_value
&val
)
1293 if (elt
.index
< val
.index
)
1295 if (elt
.index
> val
.index
)
1297 if (elt
.unit_offset
< val
.unit_offset
)
1302 if (res
== m_elts
.end ()
1303 || res
->index
!= index
1304 || res
->unit_offset
!= unit_offset
)
1307 /* TODO: perhaps remove the check (that the underlying array is indeed
1308 sorted) if it turns out it can be too slow? */
1312 const ipa_argagg_value
*slow_res
= NULL
;
1313 int prev_index
= -1;
1314 unsigned prev_unit_offset
= 0;
1315 for (const ipa_argagg_value
&av
: m_elts
)
1317 gcc_assert (prev_index
< 0
1318 || prev_index
< av
.index
1319 || prev_unit_offset
< av
.unit_offset
);
1320 prev_index
= av
.index
;
1321 prev_unit_offset
= av
.unit_offset
;
1322 if (av
.index
== index
1323 && av
.unit_offset
== unit_offset
)
1326 gcc_assert (res
== slow_res
);
1331 /* Return the first item describing a constant stored for parameter with INDEX,
1332 regardless of offset or reference, or NULL if there is no such constant. */
1334 const ipa_argagg_value
*
1335 ipa_argagg_value_list::get_elt_for_index (int index
) const
1337 const ipa_argagg_value
*res
1338 = std::lower_bound (m_elts
.begin (), m_elts
.end (), index
,
1339 [] (const ipa_argagg_value
&elt
, unsigned idx
)
1341 return elt
.index
< idx
;
1343 if (res
== m_elts
.end ()
1344 || res
->index
!= index
)
1349 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1350 performing any check of whether value is passed by reference, or NULL_TREE
1351 if there is no such constant. */
1354 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
) const
1356 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1357 return av
? av
->value
: NULL_TREE
;
1360 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1361 passed by reference or not according to BY_REF, or NULL_TREE if there is
1362 no such constant. */
1365 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
,
1368 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1369 if (av
&& av
->by_ref
== by_ref
)
1374 /* Return true if all elements present in OTHER are also present in this
1378 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list
&other
) const
1381 for (unsigned i
= 0; i
< other
.m_elts
.size (); i
++)
1383 unsigned other_index
= other
.m_elts
[i
].index
;
1384 unsigned other_offset
= other
.m_elts
[i
].unit_offset
;
1386 while (j
< m_elts
.size ()
1387 && (m_elts
[j
].index
< other_index
1388 || (m_elts
[j
].index
== other_index
1389 && m_elts
[j
].unit_offset
< other_offset
)))
1392 if (j
>= m_elts
.size ()
1393 || m_elts
[j
].index
!= other_index
1394 || m_elts
[j
].unit_offset
!= other_offset
1395 || m_elts
[j
].by_ref
!= other
.m_elts
[i
].by_ref
1397 || !values_equal_for_ipcp_p (m_elts
[j
].value
, other
.m_elts
[i
].value
))
1403 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1404 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1405 offsets but skip those which would end up with a negative offset. */
1408 ipa_argagg_value_list::push_adjusted_values (unsigned src_index
,
1409 unsigned dest_index
,
1410 unsigned unit_delta
,
1411 vec
<ipa_argagg_value
> *res
) const
1413 const ipa_argagg_value
*av
= get_elt_for_index (src_index
);
1416 unsigned prev_unit_offset
= 0;
1418 for (; av
< m_elts
.end (); ++av
)
1420 if (av
->index
> src_index
)
1422 if (av
->index
== src_index
1423 && (av
->unit_offset
>= unit_delta
)
1426 ipa_argagg_value new_av
;
1427 gcc_checking_assert (av
->value
);
1428 new_av
.value
= av
->value
;
1429 new_av
.unit_offset
= av
->unit_offset
- unit_delta
;
1430 new_av
.index
= dest_index
;
1431 new_av
.by_ref
= av
->by_ref
;
1432 gcc_assert (!av
->killed
);
1433 new_av
.killed
= false;
1435 /* Quick check that the offsets we push are indeed increasing. */
1437 || new_av
.unit_offset
> prev_unit_offset
);
1438 prev_unit_offset
= new_av
.unit_offset
;
1441 res
->safe_push (new_av
);
1446 /* Push to RES information about single lattices describing aggregate values in
1447 PLATS as those describing parameter DEST_INDEX and the original offset minus
1448 UNIT_DELTA. Return true if any item has been pushed to RES. */
1451 push_agg_values_from_plats (ipcp_param_lattices
*plats
, int dest_index
,
1452 unsigned unit_delta
,
1453 vec
<ipa_argagg_value
> *res
)
1455 if (plats
->aggs_contain_variable
)
1458 bool pushed_sth
= false;
1460 unsigned prev_unit_offset
= 0;
1461 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
1462 if (aglat
->is_single_const ()
1463 && (aglat
->offset
/ BITS_PER_UNIT
- unit_delta
) >= 0)
1465 ipa_argagg_value iav
;
1466 iav
.value
= aglat
->values
->value
;
1467 iav
.unit_offset
= aglat
->offset
/ BITS_PER_UNIT
- unit_delta
;
1468 iav
.index
= dest_index
;
1469 iav
.by_ref
= plats
->aggs_by_ref
;
1473 || iav
.unit_offset
> prev_unit_offset
);
1474 prev_unit_offset
= iav
.unit_offset
;
1478 res
->safe_push (iav
);
1483 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1484 Return the number of remaining valid entries. */
1487 intersect_argaggs_with (vec
<ipa_argagg_value
> &elts
,
1488 const vec
<ipa_argagg_value
> &other
)
1490 unsigned valid_entries
= 0;
1492 for (unsigned i
= 0; i
< elts
.length (); i
++)
1497 unsigned this_index
= elts
[i
].index
;
1498 unsigned this_offset
= elts
[i
].unit_offset
;
1500 while (j
< other
.length ()
1501 && (other
[j
].index
< this_index
1502 || (other
[j
].index
== this_index
1503 && other
[j
].unit_offset
< this_offset
)))
1506 if (j
>= other
.length ())
1508 elts
[i
].value
= NULL_TREE
;
1512 if (other
[j
].index
== this_index
1513 && other
[j
].unit_offset
== this_offset
1514 && other
[j
].by_ref
== elts
[i
].by_ref
1516 && values_equal_for_ipcp_p (other
[j
].value
, elts
[i
].value
))
1519 elts
[i
].value
= NULL_TREE
;
1521 return valid_entries
;
1524 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1525 return true is any of them has not been marked as such so far. */
1528 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1531 ret
= plats
->itself
.set_contains_variable ();
1532 ret
|= plats
->ctxlat
.set_contains_variable ();
1533 ret
|= set_agg_lats_contain_variable (plats
);
1534 ret
|= plats
->bits_lattice
.set_to_bottom ();
1535 ret
|= plats
->m_value_range
.set_to_bottom ();
1539 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1540 points to by the number of callers to NODE. */
1543 count_callers (cgraph_node
*node
, void *data
)
1545 int *caller_count
= (int *) data
;
1547 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1548 /* Local thunks can be handled transparently, but if the thunk cannot
1549 be optimized out, count it as a real use. */
1550 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1555 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1556 the one caller of some other node. Set the caller's corresponding flag. */
1559 set_single_call_flag (cgraph_node
*node
, void *)
1561 cgraph_edge
*cs
= node
->callers
;
1562 /* Local thunks can be handled transparently, skip them. */
1563 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1564 cs
= cs
->next_caller
;
1566 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1568 info
->node_calling_single_call
= true;
1574 /* Initialize ipcp_lattices. */
1577 initialize_node_lattices (struct cgraph_node
*node
)
1579 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1580 struct cgraph_edge
*ie
;
1581 bool disable
= false, variable
= false;
1584 gcc_checking_assert (node
->has_gimple_body_p ());
1586 if (!ipa_get_param_count (info
))
1588 else if (node
->local
)
1590 int caller_count
= 0;
1591 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1593 gcc_checking_assert (caller_count
> 0);
1594 if (caller_count
== 1)
1595 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1600 /* When cloning is allowed, we can assume that externally visible
1601 functions are not called. We will compensate this by cloning
1603 if (ipcp_versionable_function_p (node
)
1604 && ipcp_cloning_candidate_p (node
))
1610 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1611 && !node
->alias
&& !node
->thunk
)
1613 fprintf (dump_file
, "Initializing lattices of %s\n",
1614 node
->dump_name ());
1615 if (disable
|| variable
)
1616 fprintf (dump_file
, " Marking all lattices as %s\n",
1617 disable
? "BOTTOM" : "VARIABLE");
1620 auto_vec
<bool, 16> surviving_params
;
1621 bool pre_modified
= false;
1623 clone_info
*cinfo
= clone_info::get (node
);
1625 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1627 /* At the moment all IPA optimizations should use the number of
1628 parameters of the prevailing decl as the m_always_copy_start.
1629 Handling any other value would complicate the code below, so for the
1630 time bing let's only assert it is so. */
1631 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1632 == ipa_get_param_count (info
))
1633 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1635 pre_modified
= true;
1636 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1638 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1639 && !node
->alias
&& !node
->thunk
)
1642 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1644 if (j
< (int) surviving_params
.length ()
1645 && surviving_params
[j
])
1650 " The following parameters are dead on arrival:");
1653 fprintf (dump_file
, " %u", j
);
1656 fprintf (dump_file
, "\n");
1660 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1662 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1663 tree type
= ipa_get_type (info
, i
);
1665 || !ipa_get_type (info
, i
)
1666 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1667 || !surviving_params
[i
])))
1669 plats
->itself
.set_to_bottom ();
1670 plats
->ctxlat
.set_to_bottom ();
1671 set_agg_lats_to_bottom (plats
);
1672 plats
->bits_lattice
.set_to_bottom ();
1673 plats
->m_value_range
.init (type
);
1674 plats
->m_value_range
.set_to_bottom ();
1678 plats
->m_value_range
.init (type
);
1680 set_all_contains_variable (plats
);
1684 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1685 if (ie
->indirect_info
->polymorphic
1686 && ie
->indirect_info
->param_index
>= 0)
1688 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1689 ipa_get_parm_lattices (info
,
1690 ie
->indirect_info
->param_index
)->virt_call
= 1;
1694 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1698 ipacp_value_safe_for_type (tree param_type
, tree value
)
1700 tree val_type
= TREE_TYPE (value
);
1701 if (param_type
== val_type
1702 || useless_type_conversion_p (param_type
, val_type
)
1703 || fold_convertible_p (param_type
, value
))
1709 /* Return the result of a (possibly arithmetic) operation on the constant
1710 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1711 the type of the parameter to which the result is passed. Return
1712 NULL_TREE if that cannot be determined or be considered an
1713 interprocedural invariant. */
1716 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1721 if (opcode
== NOP_EXPR
)
1723 if (!is_gimple_ip_invariant (input
))
1726 if (opcode
== ASSERT_EXPR
)
1728 if (values_equal_for_ipcp_p (input
, operand
))
1736 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1737 res_type
= boolean_type_node
;
1738 else if (expr_type_first_operand_type_p (opcode
))
1739 res_type
= TREE_TYPE (input
);
1744 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1745 res
= fold_unary (opcode
, res_type
, input
);
1747 res
= fold_binary (opcode
, res_type
, input
, operand
);
1749 if (res
&& !is_gimple_ip_invariant (res
))
1755 /* Return the result of a (possibly arithmetic) pass through jump function
1756 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1757 to which the result is passed. Return NULL_TREE if that cannot be
1758 determined or be considered an interprocedural invariant. */
1761 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1764 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1766 ipa_get_jf_pass_through_operand (jfunc
),
1770 /* Return the result of an ancestor jump function JFUNC on the constant value
1771 INPUT. Return NULL_TREE if that cannot be determined. */
1774 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1776 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1777 if (TREE_CODE (input
) == ADDR_EXPR
)
1779 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1780 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1781 if (known_eq (off
, 0))
1783 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1784 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1785 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1786 build_int_cst (ptr_type_node
, byte_offset
)));
1788 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1795 /* Determine whether JFUNC evaluates to a single known constant value and if
1796 so, return it. Otherwise return NULL. INFO describes the caller node or
1797 the one it is inlined to, so that pass-through jump functions can be
1798 evaluated. PARM_TYPE is the type of the parameter to which the result is
1802 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1805 if (jfunc
->type
== IPA_JF_CONST
)
1806 return ipa_get_jf_constant (jfunc
);
1807 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1808 || jfunc
->type
== IPA_JF_ANCESTOR
)
1813 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1814 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1816 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1818 if (info
->ipcp_orig_node
)
1819 input
= info
->known_csts
[idx
];
1822 ipcp_lattice
<tree
> *lat
;
1825 || idx
>= ipa_get_param_count (info
))
1827 lat
= ipa_get_scalar_lat (info
, idx
);
1828 if (!lat
->is_single_const ())
1830 input
= lat
->values
->value
;
1836 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1837 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1839 return ipa_get_jf_ancestor_result (jfunc
, input
);
1845 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1846 that INFO describes the caller node or the one it is inlined to, CS is the
1847 call graph edge corresponding to JFUNC and CSIDX index of the described
1850 ipa_polymorphic_call_context
1851 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1852 ipa_jump_func
*jfunc
)
1854 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1855 ipa_polymorphic_call_context ctx
;
1856 ipa_polymorphic_call_context
*edge_ctx
1857 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1859 if (edge_ctx
&& !edge_ctx
->useless_p ())
1862 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1863 || jfunc
->type
== IPA_JF_ANCESTOR
)
1865 ipa_polymorphic_call_context srcctx
;
1867 bool type_preserved
= true;
1868 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1870 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1872 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1873 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1877 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1878 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1880 if (info
->ipcp_orig_node
)
1882 if (info
->known_contexts
.exists ())
1883 srcctx
= info
->known_contexts
[srcidx
];
1888 || srcidx
>= ipa_get_param_count (info
))
1890 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1891 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1892 if (!lat
->is_single_const ())
1894 srcctx
= lat
->values
->value
;
1896 if (srcctx
.useless_p ())
1898 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1899 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1900 if (!type_preserved
)
1901 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1902 srcctx
.combine_with (ctx
);
1909 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1910 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1911 the result is a range that is not VARYING nor UNDEFINED. */
1914 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1915 const vrange
&src_vr
,
1916 enum tree_code operation
,
1917 tree dst_type
, tree src_type
)
1919 if (!irange::supports_p (dst_type
) || !irange::supports_p (src_type
))
1922 range_op_handler
handler (operation
);
1926 Value_Range
varying (dst_type
);
1927 varying
.set_varying (dst_type
);
1929 return (handler
.operand_check_p (dst_type
, src_type
, dst_type
)
1930 && handler
.fold_range (dst_vr
, dst_type
, src_vr
, varying
)
1931 && !dst_vr
.varying_p ()
1932 && !dst_vr
.undefined_p ());
1935 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1936 first be extracted onto a vrange. */
1939 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1940 const ipa_vr
&src_vr
,
1941 enum tree_code operation
,
1942 tree dst_type
, tree src_type
)
1945 src_vr
.get_vrange (tmp
);
1946 return ipa_vr_operation_and_type_effects (dst_vr
, tmp
, operation
,
1947 dst_type
, src_type
);
1950 /* Determine range of JFUNC given that INFO describes the caller node or
1951 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1952 and PARM_TYPE of the parameter. */
1955 ipa_value_range_from_jfunc (vrange
&vr
,
1956 ipa_node_params
*info
, cgraph_edge
*cs
,
1957 ipa_jump_func
*jfunc
, tree parm_type
)
1959 vr
.set_undefined ();
1962 ipa_vr_operation_and_type_effects (vr
,
1964 NOP_EXPR
, parm_type
,
1965 jfunc
->m_vr
->type ());
1966 if (vr
.singleton_p ())
1968 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1971 ipcp_transformation
*sum
1972 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1973 ? cs
->caller
->inlined_to
1975 if (!sum
|| !sum
->m_vr
)
1978 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1980 if (!(*sum
->m_vr
)[idx
].known_p ())
1982 tree vr_type
= ipa_get_type (info
, idx
);
1984 (*sum
->m_vr
)[idx
].get_vrange (srcvr
);
1986 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1988 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1990 Value_Range
res (vr_type
);
1992 if (ipa_vr_operation_and_type_effects (res
,
1994 operation
, parm_type
,
2000 Value_Range
op_res (vr_type
);
2001 Value_Range
res (vr_type
);
2002 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2003 Value_Range
op_vr (vr_type
);
2004 range_op_handler
handler (operation
);
2006 ipa_range_set_and_normalize (op_vr
, op
);
2009 || !op_res
.supports_type_p (vr_type
)
2010 || !handler
.fold_range (op_res
, vr_type
, srcvr
, op_vr
))
2011 op_res
.set_varying (vr_type
);
2013 if (ipa_vr_operation_and_type_effects (res
,
2015 NOP_EXPR
, parm_type
,
2022 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2023 single known constant value and if so, return it. Otherwise return NULL.
2024 NODE and INFO describes the caller node or the one it is inlined to, and
2025 its related info. */
2028 ipa_agg_value_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
2029 const ipa_agg_jf_item
*item
)
2031 tree value
= NULL_TREE
;
2034 if (item
->offset
< 0
2035 || item
->jftype
== IPA_JF_UNKNOWN
2036 || item
->offset
>= (HOST_WIDE_INT
) UINT_MAX
* BITS_PER_UNIT
)
2039 if (item
->jftype
== IPA_JF_CONST
)
2040 return item
->value
.constant
;
2042 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2043 || item
->jftype
== IPA_JF_LOAD_AGG
);
2045 src_idx
= item
->value
.pass_through
.formal_id
;
2047 if (info
->ipcp_orig_node
)
2049 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2050 value
= info
->known_csts
[src_idx
];
2051 else if (ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
))
2053 ipa_argagg_value_list
avl (ts
);
2054 value
= avl
.get_value (src_idx
,
2055 item
->value
.load_agg
.offset
/ BITS_PER_UNIT
,
2056 item
->value
.load_agg
.by_ref
);
2059 else if (info
->lattices
)
2061 class ipcp_param_lattices
*src_plats
2062 = ipa_get_parm_lattices (info
, src_idx
);
2064 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2066 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
2068 if (!lat
->is_single_const ())
2071 value
= lat
->values
->value
;
2073 else if (src_plats
->aggs
2074 && !src_plats
->aggs_bottom
2075 && !src_plats
->aggs_contain_variable
2076 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
2078 struct ipcp_agg_lattice
*aglat
;
2080 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
2082 if (aglat
->offset
> item
->value
.load_agg
.offset
)
2085 if (aglat
->offset
== item
->value
.load_agg
.offset
)
2087 if (aglat
->is_single_const ())
2088 value
= aglat
->values
->value
;
2098 if (item
->jftype
== IPA_JF_LOAD_AGG
)
2100 tree load_type
= item
->value
.load_agg
.type
;
2101 tree value_type
= TREE_TYPE (value
);
2103 /* Ensure value type is compatible with load type. */
2104 if (!useless_type_conversion_p (load_type
, value_type
))
2108 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
2110 item
->value
.pass_through
.operand
,
2114 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2115 caller is inlined to) NODE which described by INFO and push the results to
2116 RES as describing values passed in parameter DST_INDEX. */
2119 ipa_push_agg_values_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
2120 ipa_agg_jump_function
*agg_jfunc
,
2122 vec
<ipa_argagg_value
> *res
)
2124 unsigned prev_unit_offset
= 0;
2127 for (const ipa_agg_jf_item
&item
: agg_jfunc
->items
)
2129 tree value
= ipa_agg_value_from_jfunc (info
, node
, &item
);
2133 ipa_argagg_value iav
;
2135 iav
.unit_offset
= item
.offset
/ BITS_PER_UNIT
;
2136 iav
.index
= dst_index
;
2137 iav
.by_ref
= agg_jfunc
->by_ref
;
2141 || iav
.unit_offset
> prev_unit_offset
);
2142 prev_unit_offset
= iav
.unit_offset
;
2145 res
->safe_push (iav
);
2149 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2150 bottom, not containing a variable component and without any known value at
2154 ipcp_verify_propagated_values (void)
2156 struct cgraph_node
*node
;
2158 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
2160 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
2161 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
2162 || !opt_for_fn (node
->decl
, optimize
))
2164 int i
, count
= ipa_get_param_count (info
);
2166 for (i
= 0; i
< count
; i
++)
2168 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
2171 && !lat
->contains_variable
2172 && lat
->values_count
== 0)
2176 symtab
->dump (dump_file
);
2177 fprintf (dump_file
, "\nIPA lattices after constant "
2178 "propagation, before gcc_unreachable:\n");
2179 print_all_lattices (dump_file
, true, false);
2188 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2191 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
2192 ipa_polymorphic_call_context y
)
2194 return x
.equal_to (y
);
2198 /* Add a new value source to the value represented by THIS, marking that a
2199 value comes from edge CS and (if the underlying jump function is a
2200 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2201 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2202 scalar value of the parameter itself or the offset within an aggregate. */
2204 template <typename valtype
>
2206 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
2207 int src_idx
, HOST_WIDE_INT offset
)
2209 ipcp_value_source
<valtype
> *src
;
2211 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
2212 src
->offset
= offset
;
2215 src
->index
= src_idx
;
2217 src
->next
= sources
;
2221 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2222 SOURCE and clear all other fields. */
2224 static ipcp_value
<tree
> *
2225 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
2227 ipcp_value
<tree
> *val
;
2229 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
2231 val
->self_recursion_generated_level
= same_lat_gen_level
;
2235 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2236 value to SOURCE and clear all other fields. */
2238 static ipcp_value
<ipa_polymorphic_call_context
> *
2239 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
2240 unsigned same_lat_gen_level
)
2242 ipcp_value
<ipa_polymorphic_call_context
> *val
;
2244 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
2245 ipcp_value
<ipa_polymorphic_call_context
>();
2247 val
->self_recursion_generated_level
= same_lat_gen_level
;
2251 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2252 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2253 meaning. OFFSET -1 means the source is scalar and not a part of an
2254 aggregate. If non-NULL, VAL_P records address of existing or newly added
2257 If the value is generated for a self-recursive call as a result of an
2258 arithmetic pass-through jump-function acting on a value in the same lattice,
2259 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2260 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2262 template <typename valtype
>
2264 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
2265 ipcp_value
<valtype
> *src_val
,
2266 int src_idx
, HOST_WIDE_INT offset
,
2267 ipcp_value
<valtype
> **val_p
,
2268 unsigned same_lat_gen_level
)
2270 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
2278 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
2279 if (values_equal_for_ipcp_p (val
->value
, newval
))
2284 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
2285 val
->self_recursion_generated_level
= same_lat_gen_level
;
2287 if (ipa_edge_within_scc (cs
))
2289 ipcp_value_source
<valtype
> *s
;
2290 for (s
= val
->sources
; s
; s
= s
->next
)
2291 if (s
->cs
== cs
&& s
->val
== src_val
)
2297 val
->add_source (cs
, src_val
, src_idx
, offset
);
2301 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
2302 param_ipa_cp_value_list_size
))
2304 /* We can only free sources, not the values themselves, because sources
2305 of other values in this SCC might point to them. */
2306 for (val
= values
; val
; val
= val
->next
)
2308 while (val
->sources
)
2310 ipcp_value_source
<valtype
> *src
= val
->sources
;
2311 val
->sources
= src
->next
;
2312 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2316 return set_to_bottom ();
2320 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2321 val
->add_source (cs
, src_val
, src_idx
, offset
);
2324 /* Add the new value to end of value list, which can reduce iterations
2325 of propagation stage for recursive function. */
2327 last_val
->next
= val
;
2337 /* A helper function that returns result of operation specified by OPCODE on
2338 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2339 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2340 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2344 get_val_across_arith_op (enum tree_code opcode
,
2347 ipcp_value
<tree
> *src_val
,
2350 tree opnd1
= src_val
->value
;
2352 /* Skip source values that is incompatible with specified type. */
2354 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2357 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2360 /* Propagate values through an arithmetic transformation described by a jump
2361 function associated with edge CS, taking values from SRC_LAT and putting
2362 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2363 OPND2 is a constant value if transformation is a binary operation.
2364 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2365 a part of the aggregate. SRC_IDX is the index of the source parameter.
2366 RES_TYPE is the value type of result being propagated into. Return true if
2367 DEST_LAT changed. */
2370 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2371 enum tree_code opcode
,
2374 ipcp_lattice
<tree
> *src_lat
,
2375 ipcp_lattice
<tree
> *dest_lat
,
2376 HOST_WIDE_INT src_offset
,
2380 ipcp_value
<tree
> *src_val
;
2383 /* Due to circular dependencies, propagating within an SCC through arithmetic
2384 transformation would create infinite number of values. But for
2385 self-feeding recursive function, we could allow propagation in a limited
2386 count, and this can enable a simple kind of recursive function versioning.
2387 For other scenario, we would just make lattices bottom. */
2388 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2392 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2393 param_ipa_cp_max_recursive_depth
);
2394 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2395 return dest_lat
->set_contains_variable ();
2397 /* No benefit if recursive execution is in low probability. */
2398 if (cs
->sreal_frequency () * 100
2399 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2400 param_ipa_cp_min_recursive_probability
))
2401 return dest_lat
->set_contains_variable ();
2403 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2405 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2407 /* Now we do not use self-recursively generated value as propagation
2408 source, this is absolutely conservative, but could avoid explosion
2409 of lattice's value space, especially when one recursive function
2410 calls another recursive. */
2411 if (src_val
->self_recursion_generated_p ())
2413 ipcp_value_source
<tree
> *s
;
2415 /* If the lattice has already been propagated for the call site,
2416 no need to do that again. */
2417 for (s
= src_val
->sources
; s
; s
= s
->next
)
2419 return dest_lat
->set_contains_variable ();
2422 val_seeds
.safe_push (src_val
);
2425 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2427 /* Recursively generate lattice values with a limited count. */
2428 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2430 for (int j
= 1; j
< max_recursive_depth
; j
++)
2432 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2435 || !ipacp_value_safe_for_type (res_type
, cstval
))
2438 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2439 src_offset
, &src_val
, j
);
2440 gcc_checking_assert (src_val
);
2443 ret
|= dest_lat
->set_contains_variable ();
2446 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2448 /* Now we do not use self-recursively generated value as propagation
2449 source, otherwise it is easy to make value space of normal lattice
2451 if (src_val
->self_recursion_generated_p ())
2453 ret
|= dest_lat
->set_contains_variable ();
2457 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2460 && ipacp_value_safe_for_type (res_type
, cstval
))
2461 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2464 ret
|= dest_lat
->set_contains_variable ();
2470 /* Propagate values through a pass-through jump function JFUNC associated with
2471 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2472 is the index of the source parameter. PARM_TYPE is the type of the
2473 parameter to which the result is passed. */
2476 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2477 ipcp_lattice
<tree
> *src_lat
,
2478 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2481 return propagate_vals_across_arith_jfunc (cs
,
2482 ipa_get_jf_pass_through_operation (jfunc
),
2484 ipa_get_jf_pass_through_operand (jfunc
),
2485 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2488 /* Propagate values through an ancestor jump function JFUNC associated with
2489 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2490 is the index of the source parameter. */
2493 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2494 struct ipa_jump_func
*jfunc
,
2495 ipcp_lattice
<tree
> *src_lat
,
2496 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2499 ipcp_value
<tree
> *src_val
;
2502 if (ipa_edge_within_scc (cs
))
2503 return dest_lat
->set_contains_variable ();
2505 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2507 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2509 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2510 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2512 ret
|= dest_lat
->set_contains_variable ();
2518 /* Propagate scalar values across jump function JFUNC that is associated with
2519 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2520 parameter to which the result is passed. */
2523 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2524 struct ipa_jump_func
*jfunc
,
2525 ipcp_lattice
<tree
> *dest_lat
,
2528 if (dest_lat
->bottom
)
2531 if (jfunc
->type
== IPA_JF_CONST
)
2533 tree val
= ipa_get_jf_constant (jfunc
);
2534 if (ipacp_value_safe_for_type (param_type
, val
))
2535 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2537 return dest_lat
->set_contains_variable ();
2539 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2540 || jfunc
->type
== IPA_JF_ANCESTOR
)
2542 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2543 ipcp_lattice
<tree
> *src_lat
;
2547 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2548 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2550 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2552 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2553 if (src_lat
->bottom
)
2554 return dest_lat
->set_contains_variable ();
2556 /* If we would need to clone the caller and cannot, do not propagate. */
2557 if (!ipcp_versionable_function_p (cs
->caller
)
2558 && (src_lat
->contains_variable
2559 || (src_lat
->values_count
> 1)))
2560 return dest_lat
->set_contains_variable ();
2562 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2563 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2567 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2568 src_idx
, param_type
);
2570 if (src_lat
->contains_variable
)
2571 ret
|= dest_lat
->set_contains_variable ();
2576 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2577 use it for indirect inlining), we should propagate them too. */
2578 return dest_lat
->set_contains_variable ();
2581 /* Propagate scalar values across jump function JFUNC that is associated with
2582 edge CS and describes argument IDX and put the values into DEST_LAT. */
2585 propagate_context_across_jump_function (cgraph_edge
*cs
,
2586 ipa_jump_func
*jfunc
, int idx
,
2587 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2589 if (dest_lat
->bottom
)
2591 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2593 bool added_sth
= false;
2594 bool type_preserved
= true;
2596 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2597 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2600 edge_ctx
= *edge_ctx_ptr
;
2602 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2603 || jfunc
->type
== IPA_JF_ANCESTOR
)
2605 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2607 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2609 /* TODO: Once we figure out how to propagate speculations, it will
2610 probably be a good idea to switch to speculation if type_preserved is
2611 not set instead of punting. */
2612 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2614 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2616 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2617 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2621 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2622 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2625 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2626 /* If we would need to clone the caller and cannot, do not propagate. */
2627 if (!ipcp_versionable_function_p (cs
->caller
)
2628 && (src_lat
->contains_variable
2629 || (src_lat
->values_count
> 1)))
2632 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2633 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2635 ipa_polymorphic_call_context cur
= src_val
->value
;
2637 if (!type_preserved
)
2638 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2639 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2640 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2641 /* TODO: In cases we know how the context is going to be used,
2642 we can improve the result by passing proper OTR_TYPE. */
2643 cur
.combine_with (edge_ctx
);
2644 if (!cur
.useless_p ())
2646 if (src_lat
->contains_variable
2647 && !edge_ctx
.equal_to (cur
))
2648 ret
|= dest_lat
->set_contains_variable ();
2649 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2658 if (!edge_ctx
.useless_p ())
2659 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2661 ret
|= dest_lat
->set_contains_variable ();
2667 /* Propagate bits across jfunc that is associated with
2668 edge cs and update dest_lattice accordingly. */
2671 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2672 ipa_jump_func
*jfunc
,
2673 ipcp_bits_lattice
*dest_lattice
)
2675 if (dest_lattice
->bottom_p ())
2678 enum availability availability
;
2679 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2680 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2681 tree parm_type
= ipa_get_type (callee_info
, idx
);
2683 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2684 transform for these cases. Similarly, we can have bad type mismatches
2685 with LTO, avoid doing anything with those too. */
2687 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2689 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2690 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2691 "param %i of %s is NULL or unsuitable for bits propagation\n",
2692 idx
, cs
->callee
->dump_name ());
2694 return dest_lattice
->set_to_bottom ();
2697 unsigned precision
= TYPE_PRECISION (parm_type
);
2698 signop sgn
= TYPE_SIGN (parm_type
);
2700 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2701 || jfunc
->type
== IPA_JF_ANCESTOR
)
2703 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2704 tree operand
= NULL_TREE
;
2705 enum tree_code code
;
2707 bool keep_null
= false;
2709 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2711 code
= ipa_get_jf_pass_through_operation (jfunc
);
2712 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2713 if (code
!= NOP_EXPR
)
2714 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2718 code
= POINTER_PLUS_EXPR
;
2719 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2720 unsigned HOST_WIDE_INT offset
2721 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2722 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2723 operand
= build_int_cstu (size_type_node
, offset
);
2726 class ipcp_param_lattices
*src_lats
2727 = ipa_get_parm_lattices (caller_info
, src_idx
);
2729 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2735 Assume lattice for x is bottom, however we can still propagate
2736 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2737 and we store it in jump function during analysis stage. */
2739 if (!src_lats
->bits_lattice
.bottom_p ())
2742 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2744 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2745 sgn
, code
, operand
, drop_all_ones
);
2749 Value_Range
vr (parm_type
);
2752 jfunc
->m_vr
->get_vrange (vr
);
2753 if (!vr
.undefined_p () && !vr
.varying_p ())
2755 irange
&r
= as_a
<irange
> (vr
);
2756 irange_bitmask bm
= r
.get_bitmask ();
2758 = widest_int::from (bm
.mask (), TYPE_SIGN (parm_type
));
2760 = widest_int::from (bm
.value (), TYPE_SIGN (parm_type
));
2761 return dest_lattice
->meet_with (value
, mask
, precision
);
2764 return dest_lattice
->set_to_bottom ();
2767 /* Propagate value range across jump function JFUNC that is associated with
2768 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2772 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2773 class ipcp_param_lattices
*dest_plats
,
2776 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2778 if (dest_lat
->bottom_p ())
2782 || (!INTEGRAL_TYPE_P (param_type
)
2783 && !POINTER_TYPE_P (param_type
)))
2784 return dest_lat
->set_to_bottom ();
2786 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2788 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2789 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2790 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2791 class ipcp_param_lattices
*src_lats
2792 = ipa_get_parm_lattices (caller_info
, src_idx
);
2793 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2795 if (src_lats
->m_value_range
.bottom_p ())
2796 return dest_lat
->set_to_bottom ();
2798 Value_Range
vr (operand_type
);
2799 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2800 ipa_vr_operation_and_type_effects (vr
,
2801 src_lats
->m_value_range
.m_vr
,
2802 operation
, param_type
,
2804 /* A crude way to prevent unbounded number of value range updates
2805 in SCC components. We should allow limited number of updates within
2807 else if (!ipa_edge_within_scc (cs
))
2809 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2810 Value_Range
op_vr (TREE_TYPE (op
));
2811 Value_Range
op_res (operand_type
);
2812 range_op_handler
handler (operation
);
2814 ipa_range_set_and_normalize (op_vr
, op
);
2817 || !op_res
.supports_type_p (operand_type
)
2818 || !handler
.fold_range (op_res
, operand_type
,
2819 src_lats
->m_value_range
.m_vr
, op_vr
))
2820 op_res
.set_varying (operand_type
);
2822 ipa_vr_operation_and_type_effects (vr
,
2824 NOP_EXPR
, param_type
,
2827 if (!vr
.undefined_p () && !vr
.varying_p ())
2831 Value_Range
jvr (param_type
);
2832 if (ipa_vr_operation_and_type_effects (jvr
, *jfunc
->m_vr
,
2835 jfunc
->m_vr
->type ()))
2838 return dest_lat
->meet_with (vr
);
2841 else if (jfunc
->type
== IPA_JF_CONST
)
2843 tree val
= ipa_get_jf_constant (jfunc
);
2844 if (TREE_CODE (val
) == INTEGER_CST
)
2846 val
= fold_convert (param_type
, val
);
2847 if (TREE_OVERFLOW_P (val
))
2848 val
= drop_tree_overflow (val
);
2850 Value_Range
tmpvr (val
, val
);
2851 return dest_lat
->meet_with (tmpvr
);
2855 Value_Range
vr (param_type
);
2857 && ipa_vr_operation_and_type_effects (vr
, *jfunc
->m_vr
, NOP_EXPR
,
2859 jfunc
->m_vr
->type ()))
2860 return dest_lat
->meet_with (vr
);
2862 return dest_lat
->set_to_bottom ();
2865 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2866 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2867 other cases, return false). If there are no aggregate items, set
2868 aggs_by_ref to NEW_AGGS_BY_REF. */
2871 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2872 bool new_aggs_by_ref
)
2874 if (dest_plats
->aggs
)
2876 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2878 set_agg_lats_to_bottom (dest_plats
);
2883 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2887 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2888 already existing lattice for the given OFFSET and SIZE, marking all skipped
2889 lattices as containing variable and checking for overlaps. If there is no
2890 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2891 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2892 unless there are too many already. If there are two many, return false. If
2893 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2894 skipped lattices were newly marked as containing variable, set *CHANGE to
2895 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2898 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2899 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2900 struct ipcp_agg_lattice
***aglat
,
2901 bool pre_existing
, bool *change
, int max_agg_items
)
2903 gcc_checking_assert (offset
>= 0);
2905 while (**aglat
&& (**aglat
)->offset
< offset
)
2907 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2909 set_agg_lats_to_bottom (dest_plats
);
2912 *change
|= (**aglat
)->set_contains_variable ();
2913 *aglat
= &(**aglat
)->next
;
2916 if (**aglat
&& (**aglat
)->offset
== offset
)
2918 if ((**aglat
)->size
!= val_size
)
2920 set_agg_lats_to_bottom (dest_plats
);
2923 gcc_assert (!(**aglat
)->next
2924 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2929 struct ipcp_agg_lattice
*new_al
;
2931 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2933 set_agg_lats_to_bottom (dest_plats
);
2936 if (dest_plats
->aggs_count
== max_agg_items
)
2938 dest_plats
->aggs_count
++;
2939 new_al
= ipcp_agg_lattice_pool
.allocate ();
2940 memset (new_al
, 0, sizeof (*new_al
));
2942 new_al
->offset
= offset
;
2943 new_al
->size
= val_size
;
2944 new_al
->contains_variable
= pre_existing
;
2946 new_al
->next
= **aglat
;
2952 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2953 containing an unknown value. */
2956 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2961 ret
|= aglat
->set_contains_variable ();
2962 aglat
= aglat
->next
;
2967 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2968 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2969 parameter used for lattice value sources. Return true if DEST_PLATS changed
2973 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2974 class ipcp_param_lattices
*dest_plats
,
2975 class ipcp_param_lattices
*src_plats
,
2976 int src_idx
, HOST_WIDE_INT offset_delta
)
2978 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2979 struct ipcp_agg_lattice
**dst_aglat
;
2982 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2984 if (src_plats
->aggs_bottom
)
2985 return set_agg_lats_contain_variable (dest_plats
);
2986 if (src_plats
->aggs_contain_variable
)
2987 ret
|= set_agg_lats_contain_variable (dest_plats
);
2988 dst_aglat
= &dest_plats
->aggs
;
2990 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2991 param_ipa_max_agg_items
);
2992 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2994 src_aglat
= src_aglat
->next
)
2996 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
3000 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
3001 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
3003 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
3005 dst_aglat
= &(*dst_aglat
)->next
;
3006 if (src_aglat
->bottom
)
3008 ret
|= new_al
->set_contains_variable ();
3011 if (src_aglat
->contains_variable
)
3012 ret
|= new_al
->set_contains_variable ();
3013 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
3016 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
3019 else if (dest_plats
->aggs_bottom
)
3022 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
3026 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3027 pass-through JFUNC and if so, whether it has conform and conforms to the
3028 rules about propagating values passed by reference. */
3031 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
3032 struct ipa_jump_func
*jfunc
)
3034 return src_plats
->aggs
3035 && (!src_plats
->aggs_by_ref
3036 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
3039 /* Propagate values through ITEM, jump function for a part of an aggregate,
3040 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3041 associated with the jump function. Return true if AGLAT changed in any
3045 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
3046 struct ipa_agg_jf_item
*item
,
3047 struct ipcp_agg_lattice
*aglat
)
3049 class ipa_node_params
*caller_info
;
3050 class ipcp_param_lattices
*src_plats
;
3051 struct ipcp_lattice
<tree
> *src_lat
;
3052 HOST_WIDE_INT src_offset
;
3057 if (item
->jftype
== IPA_JF_CONST
)
3059 tree value
= item
->value
.constant
;
3061 gcc_checking_assert (is_gimple_ip_invariant (value
));
3062 return aglat
->add_value (value
, cs
, NULL
, 0);
3065 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
3066 || item
->jftype
== IPA_JF_LOAD_AGG
);
3068 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3069 src_idx
= item
->value
.pass_through
.formal_id
;
3070 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3072 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
3074 load_type
= NULL_TREE
;
3075 src_lat
= &src_plats
->itself
;
3080 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
3081 struct ipcp_agg_lattice
*src_aglat
;
3083 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
3084 if (src_aglat
->offset
>= load_offset
)
3087 load_type
= item
->value
.load_agg
.type
;
3089 || src_aglat
->offset
> load_offset
3090 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
3091 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
3092 return aglat
->set_contains_variable ();
3094 src_lat
= src_aglat
;
3095 src_offset
= load_offset
;
3099 || (!ipcp_versionable_function_p (cs
->caller
)
3100 && !src_lat
->is_single_const ()))
3101 return aglat
->set_contains_variable ();
3103 ret
= propagate_vals_across_arith_jfunc (cs
,
3104 item
->value
.pass_through
.operation
,
3106 item
->value
.pass_through
.operand
,
3112 if (src_lat
->contains_variable
)
3113 ret
|= aglat
->set_contains_variable ();
3118 /* Propagate scalar values across jump function JFUNC that is associated with
3119 edge CS and put the values into DEST_LAT. */
3122 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
3123 struct ipa_jump_func
*jfunc
,
3124 class ipcp_param_lattices
*dest_plats
)
3128 if (dest_plats
->aggs_bottom
)
3131 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3132 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3134 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3135 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
3136 class ipcp_param_lattices
*src_plats
;
3138 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3139 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
3141 /* Currently we do not produce clobber aggregate jump
3142 functions, replace with merging when we do. */
3143 gcc_assert (!jfunc
->agg
.items
);
3144 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
3149 else if (jfunc
->type
== IPA_JF_ANCESTOR
3150 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
3152 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
3153 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
3154 class ipcp_param_lattices
*src_plats
;
3156 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
3157 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
3159 /* Currently we do not produce clobber aggregate jump
3160 functions, replace with merging when we do. */
3161 gcc_assert (!jfunc
->agg
.items
);
3162 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
3163 ipa_get_jf_ancestor_offset (jfunc
));
3165 else if (!src_plats
->aggs_by_ref
)
3166 ret
|= set_agg_lats_to_bottom (dest_plats
);
3168 ret
|= set_agg_lats_contain_variable (dest_plats
);
3172 if (jfunc
->agg
.items
)
3174 bool pre_existing
= dest_plats
->aggs
!= NULL
;
3175 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
3176 struct ipa_agg_jf_item
*item
;
3179 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
3182 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
3183 param_ipa_max_agg_items
);
3184 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
3186 HOST_WIDE_INT val_size
;
3188 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
3190 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
3192 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
3193 &aglat
, pre_existing
, &ret
, max_agg_items
))
3195 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
3196 aglat
= &(*aglat
)->next
;
3198 else if (dest_plats
->aggs_bottom
)
3202 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
3205 ret
|= set_agg_lats_contain_variable (dest_plats
);
3210 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3211 non-thunk) destination, the call passes through a thunk. */
3214 call_passes_through_thunk (cgraph_edge
*cs
)
3216 cgraph_node
*alias_or_thunk
= cs
->callee
;
3217 while (alias_or_thunk
->alias
)
3218 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
3219 return alias_or_thunk
->thunk
;
3222 /* Propagate constants from the caller to the callee of CS. INFO describes the
3226 propagate_constants_across_call (struct cgraph_edge
*cs
)
3228 class ipa_node_params
*callee_info
;
3229 enum availability availability
;
3230 cgraph_node
*callee
;
3231 class ipa_edge_args
*args
;
3233 int i
, args_count
, parms_count
;
3235 callee
= cs
->callee
->function_symbol (&availability
);
3236 if (!callee
->definition
)
3238 gcc_checking_assert (callee
->has_gimple_body_p ());
3239 callee_info
= ipa_node_params_sum
->get (callee
);
3243 args
= ipa_edge_args_sum
->get (cs
);
3244 parms_count
= ipa_get_param_count (callee_info
);
3245 if (parms_count
== 0)
3248 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
3249 || !opt_for_fn (cs
->caller
->decl
, optimize
))
3251 for (i
= 0; i
< parms_count
; i
++)
3252 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3256 args_count
= ipa_get_cs_argument_count (args
);
3258 /* If this call goes through a thunk we must not propagate to the first (0th)
3259 parameter. However, we might need to uncover a thunk from below a series
3260 of aliases first. */
3261 if (call_passes_through_thunk (cs
))
3263 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3270 for (; (i
< args_count
) && (i
< parms_count
); i
++)
3272 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
3273 class ipcp_param_lattices
*dest_plats
;
3274 tree param_type
= ipa_get_type (callee_info
, i
);
3276 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
3277 if (availability
== AVAIL_INTERPOSABLE
)
3278 ret
|= set_all_contains_variable (dest_plats
);
3281 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
3282 &dest_plats
->itself
,
3284 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
3285 &dest_plats
->ctxlat
);
3287 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
3288 &dest_plats
->bits_lattice
);
3289 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
3291 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
3292 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
3293 dest_plats
, param_type
);
3295 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
3298 for (; i
< parms_count
; i
++)
3299 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
3304 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3305 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3306 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3307 KNOWN_AGGS is ignored. */
3310 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
3311 const vec
<tree
> &known_csts
,
3312 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
3313 const ipa_argagg_value_list
&avs
,
3316 int param_index
= ie
->indirect_info
->param_index
;
3317 HOST_WIDE_INT anc_offset
;
3321 *speculative
= false;
3323 if (param_index
== -1)
3326 if (!ie
->indirect_info
->polymorphic
)
3330 if (ie
->indirect_info
->agg_contents
)
3333 if ((unsigned) param_index
< known_csts
.length ()
3334 && known_csts
[param_index
])
3335 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3336 ie
->indirect_info
->offset
,
3337 ie
->indirect_info
->by_ref
);
3339 if (!t
&& ie
->indirect_info
->guaranteed_unmodified
)
3340 t
= avs
.get_value (param_index
,
3341 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3342 ie
->indirect_info
->by_ref
);
3344 else if ((unsigned) param_index
< known_csts
.length ())
3345 t
= known_csts
[param_index
];
3348 && TREE_CODE (t
) == ADDR_EXPR
3349 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3350 return TREE_OPERAND (t
, 0);
3355 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3358 gcc_assert (!ie
->indirect_info
->agg_contents
);
3359 gcc_assert (!ie
->indirect_info
->by_ref
);
3360 anc_offset
= ie
->indirect_info
->offset
;
3364 if ((unsigned) param_index
< known_csts
.length ()
3365 && known_csts
[param_index
])
3366 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3367 ie
->indirect_info
->offset
, true);
3369 /* Try to work out value of virtual table pointer value in replacements. */
3370 /* or known aggregate values. */
3372 t
= avs
.get_value (param_index
,
3373 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3376 /* If we found the virtual table pointer, lookup the target. */
3380 unsigned HOST_WIDE_INT offset
;
3381 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3384 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3385 vtable
, offset
, &can_refer
);
3389 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3390 || !possible_polymorphic_call_target_p
3391 (ie
, cgraph_node::get (target
)))
3393 /* Do not speculate builtin_unreachable, it is stupid! */
3394 if (ie
->indirect_info
->vptr_changed
)
3396 target
= ipa_impossible_devirt_target (ie
, target
);
3398 *speculative
= ie
->indirect_info
->vptr_changed
;
3405 /* Do we know the constant value of pointer? */
3406 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3407 t
= known_csts
[param_index
];
3409 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3411 ipa_polymorphic_call_context context
;
3412 if (known_contexts
.length () > (unsigned int) param_index
)
3414 context
= known_contexts
[param_index
];
3415 context
.offset_by (anc_offset
);
3416 if (ie
->indirect_info
->vptr_changed
)
3417 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3418 ie
->indirect_info
->otr_type
);
3421 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3422 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3423 if (!ctx2
.useless_p ())
3424 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3429 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3431 if (ie
->indirect_info
->vptr_changed
)
3432 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3433 ie
->indirect_info
->otr_type
);
3438 vec
<cgraph_node
*>targets
;
3441 targets
= possible_polymorphic_call_targets
3442 (ie
->indirect_info
->otr_type
,
3443 ie
->indirect_info
->otr_token
,
3445 if (!final
|| targets
.length () > 1)
3447 struct cgraph_node
*node
;
3450 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3451 || ie
->speculative
|| !ie
->maybe_hot_p ())
3453 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3454 ie
->indirect_info
->otr_token
,
3458 *speculative
= true;
3459 target
= node
->decl
;
3466 *speculative
= false;
3467 if (targets
.length () == 1)
3468 target
= targets
[0]->decl
;
3470 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3473 if (target
&& !possible_polymorphic_call_target_p (ie
,
3474 cgraph_node::get (target
)))
3478 target
= ipa_impossible_devirt_target (ie
, target
);
3484 /* If an indirect edge IE can be turned into a direct one based on data in
3485 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3486 whether the discovered target is only speculative guess. */
3489 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3490 ipa_call_arg_values
*avals
,
3493 ipa_argagg_value_list
avl (avals
);
3494 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3495 avals
->m_known_contexts
,
3499 /* Calculate devirtualization time bonus for NODE, assuming we know information
3500 about arguments stored in AVALS. */
3503 devirtualization_time_bonus (struct cgraph_node
*node
,
3504 ipa_auto_call_arg_values
*avals
)
3506 struct cgraph_edge
*ie
;
3509 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3511 struct cgraph_node
*callee
;
3512 class ipa_fn_summary
*isummary
;
3513 enum availability avail
;
3517 ipa_argagg_value_list
avl (avals
);
3518 target
= ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3519 avals
->m_known_contexts
,
3524 /* Only bare minimum benefit for clearly un-inlineable targets. */
3526 callee
= cgraph_node::get (target
);
3527 if (!callee
|| !callee
->definition
)
3529 callee
= callee
->function_symbol (&avail
);
3530 if (avail
< AVAIL_AVAILABLE
)
3532 isummary
= ipa_fn_summaries
->get (callee
);
3533 if (!isummary
|| !isummary
->inlinable
)
3536 int size
= ipa_size_summaries
->get (callee
)->size
;
3537 /* FIXME: The values below need re-considering and perhaps also
3538 integrating into the cost metrics, at lest in some very basic way. */
3539 int max_inline_insns_auto
3540 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3541 if (size
<= max_inline_insns_auto
/ 4)
3542 res
+= 31 / ((int)speculative
+ 1);
3543 else if (size
<= max_inline_insns_auto
/ 2)
3544 res
+= 15 / ((int)speculative
+ 1);
3545 else if (size
<= max_inline_insns_auto
3546 || DECL_DECLARED_INLINE_P (callee
->decl
))
3547 res
+= 7 / ((int)speculative
+ 1);
3553 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3556 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3559 ipa_hints hints
= estimates
.hints
;
3560 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3561 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3563 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3565 if (hints
& INLINE_HINT_loop_iterations
)
3566 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3568 if (hints
& INLINE_HINT_loop_stride
)
3569 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3574 /* If there is a reason to penalize the function described by INFO in the
3575 cloning goodness evaluation, do so. */
3578 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3581 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3582 evaluation
= (evaluation
3583 * (100 - opt_for_fn (node
->decl
,
3584 param_ipa_cp_recursion_penalty
))) / 100;
3586 if (info
->node_calling_single_call
)
3587 evaluation
= (evaluation
3588 * (100 - opt_for_fn (node
->decl
,
3589 param_ipa_cp_single_call_penalty
)))
3595 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3596 and SIZE_COST and with the sum of frequencies of incoming edges to the
3597 potential new clone in FREQUENCIES. */
3600 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3601 sreal freq_sum
, profile_count count_sum
,
3604 if (time_benefit
== 0
3605 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3606 || node
->optimize_for_size_p ())
3609 gcc_assert (size_cost
> 0);
3611 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3612 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3613 if (count_sum
.nonzero_p ())
3615 gcc_assert (base_count
.nonzero_p ());
3616 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3617 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3618 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3621 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3623 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3624 "size: %i, count_sum: ", time_benefit
.to_double (),
3626 count_sum
.dump (dump_file
);
3627 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3628 info
->node_within_scc
3629 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3630 info
->node_calling_single_call
? ", single_call" : "",
3631 evaluation
.to_double (), eval_threshold
);
3634 return evaluation
.to_int () >= eval_threshold
;
3638 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3639 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3642 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3643 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3644 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3646 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3647 info
->node_within_scc
3648 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3649 info
->node_calling_single_call
? ", single_call" : "",
3650 evaluation
.to_double (), eval_threshold
);
3652 return evaluation
.to_int () >= eval_threshold
;
3656 /* Grow vectors in AVALS and fill them with information about values of
3657 parameters that are known to be independent of the context. Only calculate
3658 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3659 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3660 parameters will be stored in it.
3662 TODO: Also grow context independent value range vectors. */
3665 gather_context_independent_values (class ipa_node_params
*info
,
3666 ipa_auto_call_arg_values
*avals
,
3667 bool calculate_aggs
,
3668 int *removable_params_cost
)
3670 int i
, count
= ipa_get_param_count (info
);
3673 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3674 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3676 if (removable_params_cost
)
3677 *removable_params_cost
= 0;
3679 for (i
= 0; i
< count
; i
++)
3681 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3682 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3684 if (lat
->is_single_const ())
3686 ipcp_value
<tree
> *val
= lat
->values
;
3687 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3688 avals
->m_known_vals
[i
] = val
->value
;
3689 if (removable_params_cost
)
3690 *removable_params_cost
3691 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3694 else if (removable_params_cost
3695 && !ipa_is_param_used (info
, i
))
3696 *removable_params_cost
3697 += ipa_get_param_move_cost (info
, i
);
3699 if (!ipa_is_param_used (info
, i
))
3702 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3703 /* Do not account known context as reason for cloning. We can see
3704 if it permits devirtualization. */
3705 if (ctxlat
->is_single_const ())
3706 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3709 ret
|= push_agg_values_from_plats (plats
, i
, 0, &avals
->m_known_aggs
);
3715 /* Perform time and size measurement of NODE with the context given in AVALS,
3716 calculate the benefit compared to the node without specialization and store
3717 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3718 context-independent or unused removable parameters and EST_MOVE_COST, the
3719 estimated movement of the considered parameter. */
3722 perform_estimation_of_a_value (cgraph_node
*node
,
3723 ipa_auto_call_arg_values
*avals
,
3724 int removable_params_cost
, int est_move_cost
,
3725 ipcp_value_base
*val
)
3728 ipa_call_estimates estimates
;
3730 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3732 /* Extern inline functions have no cloning local time benefits because they
3733 will be inlined anyway. The only reason to clone them is if it enables
3734 optimization in any of the functions they call. */
3735 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3738 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3739 + (devirtualization_time_bonus (node
, avals
)
3740 + hint_time_bonus (node
, estimates
)
3741 + removable_params_cost
+ est_move_cost
);
3743 int size
= estimates
.size
;
3744 gcc_checking_assert (size
>=0);
3745 /* The inliner-heuristics based estimates may think that in certain
3746 contexts some functions do not have any size at all but we want
3747 all specializations to have at least a tiny cost, not least not to
3752 val
->local_time_benefit
= time_benefit
;
3753 val
->local_size_cost
= size
;
3756 /* Get the overall limit oof growth based on parameters extracted from growth.
3757 it does not really make sense to mix functions with different overall growth
3758 limits but it is possible and if it happens, we do not want to select one
3762 get_max_overall_size (cgraph_node
*node
)
3764 long max_new_size
= orig_overall_size
;
3765 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3766 if (max_new_size
< large_unit
)
3767 max_new_size
= large_unit
;
3768 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3769 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3770 return max_new_size
;
3773 /* Return true if NODE should be cloned just for a parameter removal, possibly
3774 dumping a reason if not. */
3777 clone_for_param_removal_p (cgraph_node
*node
)
3779 if (!node
->can_change_signature
)
3781 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3782 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3783 "function cannot change signature.\n");
3786 if (node
->can_be_local_p ())
3788 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3789 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3790 "IPA-SRA can do it potentially better.\n");
3796 /* Iterate over known values of parameters of NODE and estimate the local
3797 effects in terms of time and size they have. */
3800 estimate_local_effects (struct cgraph_node
*node
)
3802 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3803 int count
= ipa_get_param_count (info
);
3805 int removable_params_cost
;
3807 if (!count
|| !ipcp_versionable_function_p (node
))
3810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3811 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3813 ipa_auto_call_arg_values avals
;
3814 always_const
= gather_context_independent_values (info
, &avals
, true,
3815 &removable_params_cost
);
3816 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3817 if (always_const
|| devirt_bonus
3818 || (removable_params_cost
&& clone_for_param_removal_p (node
)))
3820 struct caller_statistics stats
;
3821 ipa_call_estimates estimates
;
3823 init_caller_stats (&stats
);
3824 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3826 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3827 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3828 time
+= devirt_bonus
;
3829 time
+= hint_time_bonus (node
, estimates
);
3830 time
+= removable_params_cost
;
3831 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3834 fprintf (dump_file
, " - context independent values, size: %i, "
3835 "time_benefit: %f\n", size
, (time
).to_double ());
3837 if (size
<= 0 || node
->local
)
3839 info
->do_clone_for_all_contexts
= true;
3842 fprintf (dump_file
, " Decided to specialize for all "
3843 "known contexts, code not going to grow.\n");
3845 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3846 stats
.count_sum
, size
))
3848 if (size
+ overall_size
<= get_max_overall_size (node
))
3850 info
->do_clone_for_all_contexts
= true;
3851 overall_size
+= size
;
3854 fprintf (dump_file
, " Decided to specialize for all "
3855 "known contexts, growth (to %li) deemed "
3856 "beneficial.\n", overall_size
);
3858 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3859 fprintf (dump_file
, " Not cloning for all contexts because "
3860 "maximum unit size would be reached with %li.\n",
3861 size
+ overall_size
);
3863 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3864 fprintf (dump_file
, " Not cloning for all contexts because "
3865 "!good_cloning_opportunity_p.\n");
3869 for (int i
= 0; i
< count
; i
++)
3871 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3872 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3873 ipcp_value
<tree
> *val
;
3877 || avals
.m_known_vals
[i
])
3880 for (val
= lat
->values
; val
; val
= val
->next
)
3882 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3883 avals
.m_known_vals
[i
] = val
->value
;
3885 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3886 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3891 fprintf (dump_file
, " - estimates for value ");
3892 print_ipcp_constant_value (dump_file
, val
->value
);
3893 fprintf (dump_file
, " for ");
3894 ipa_dump_param (dump_file
, info
, i
);
3895 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3896 val
->local_time_benefit
.to_double (),
3897 val
->local_size_cost
);
3900 avals
.m_known_vals
[i
] = NULL_TREE
;
3903 for (int i
= 0; i
< count
; i
++)
3905 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3907 if (!plats
->virt_call
)
3910 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3911 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3915 || !avals
.m_known_contexts
[i
].useless_p ())
3918 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3920 avals
.m_known_contexts
[i
] = val
->value
;
3921 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3926 fprintf (dump_file
, " - estimates for polymorphic context ");
3927 print_ipcp_constant_value (dump_file
, val
->value
);
3928 fprintf (dump_file
, " for ");
3929 ipa_dump_param (dump_file
, info
, i
);
3930 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3931 val
->local_time_benefit
.to_double (),
3932 val
->local_size_cost
);
3935 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3938 unsigned all_ctx_len
= avals
.m_known_aggs
.length ();
3939 auto_vec
<ipa_argagg_value
, 32> all_ctx
;
3940 all_ctx
.reserve_exact (all_ctx_len
);
3941 all_ctx
.splice (avals
.m_known_aggs
);
3942 avals
.m_known_aggs
.safe_grow_cleared (all_ctx_len
+ 1);
3945 for (int index
= 0; index
< count
; index
++)
3947 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, index
);
3949 if (plats
->aggs_bottom
|| !plats
->aggs
)
3952 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3954 ipcp_value
<tree
> *val
;
3955 if (aglat
->bottom
|| !aglat
->values
3956 /* If the following is true, the one value is already part of all
3957 context estimations. */
3958 || (!plats
->aggs_contain_variable
3959 && aglat
->is_single_const ()))
3962 unsigned unit_offset
= aglat
->offset
/ BITS_PER_UNIT
;
3963 while (j
< all_ctx_len
3964 && (all_ctx
[j
].index
< index
3965 || (all_ctx
[j
].index
== index
3966 && all_ctx
[j
].unit_offset
< unit_offset
)))
3968 avals
.m_known_aggs
[j
] = all_ctx
[j
];
3972 for (unsigned k
= j
; k
< all_ctx_len
; k
++)
3973 avals
.m_known_aggs
[k
+1] = all_ctx
[k
];
3975 for (val
= aglat
->values
; val
; val
= val
->next
)
3977 avals
.m_known_aggs
[j
].value
= val
->value
;
3978 avals
.m_known_aggs
[j
].unit_offset
= unit_offset
;
3979 avals
.m_known_aggs
[j
].index
= index
;
3980 avals
.m_known_aggs
[j
].by_ref
= plats
->aggs_by_ref
;
3981 avals
.m_known_aggs
[j
].killed
= false;
3983 perform_estimation_of_a_value (node
, &avals
,
3984 removable_params_cost
, 0, val
);
3986 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3988 fprintf (dump_file
, " - estimates for value ");
3989 print_ipcp_constant_value (dump_file
, val
->value
);
3990 fprintf (dump_file
, " for ");
3991 ipa_dump_param (dump_file
, info
, index
);
3992 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3993 "]: time_benefit: %g, size: %i\n",
3994 plats
->aggs_by_ref
? "ref " : "",
3996 val
->local_time_benefit
.to_double (),
3997 val
->local_size_cost
);
4005 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
4006 topological sort of values. */
4008 template <typename valtype
>
4010 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
4012 ipcp_value_source
<valtype
> *src
;
4018 cur_val
->dfs
= dfs_counter
;
4019 cur_val
->low_link
= dfs_counter
;
4021 cur_val
->topo_next
= stack
;
4023 cur_val
->on_stack
= true;
4025 for (src
= cur_val
->sources
; src
; src
= src
->next
)
4028 if (src
->val
->dfs
== 0)
4031 if (src
->val
->low_link
< cur_val
->low_link
)
4032 cur_val
->low_link
= src
->val
->low_link
;
4034 else if (src
->val
->on_stack
4035 && src
->val
->dfs
< cur_val
->low_link
)
4036 cur_val
->low_link
= src
->val
->dfs
;
4039 if (cur_val
->dfs
== cur_val
->low_link
)
4041 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
4046 stack
= v
->topo_next
;
4047 v
->on_stack
= false;
4048 v
->scc_no
= cur_val
->dfs
;
4050 v
->scc_next
= scc_list
;
4053 while (v
!= cur_val
);
4055 cur_val
->topo_next
= values_topo
;
4056 values_topo
= cur_val
;
4060 /* Add all values in lattices associated with NODE to the topological sort if
4061 they are not there yet. */
4064 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
4066 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4067 int i
, count
= ipa_get_param_count (info
);
4069 for (i
= 0; i
< count
; i
++)
4071 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
4072 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
4073 struct ipcp_agg_lattice
*aglat
;
4077 ipcp_value
<tree
> *val
;
4078 for (val
= lat
->values
; val
; val
= val
->next
)
4079 topo
->constants
.add_val (val
);
4082 if (!plats
->aggs_bottom
)
4083 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4086 ipcp_value
<tree
> *val
;
4087 for (val
= aglat
->values
; val
; val
= val
->next
)
4088 topo
->constants
.add_val (val
);
4091 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
4092 if (!ctxlat
->bottom
)
4094 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
4095 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
4096 topo
->contexts
.add_val (ctxval
);
4101 /* One pass of constants propagation along the call graph edges, from callers
4102 to callees (requires topological ordering in TOPO), iterate over strongly
4103 connected components. */
4106 propagate_constants_topo (class ipa_topo_info
*topo
)
4110 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
4113 struct cgraph_node
*v
, *node
= topo
->order
[i
];
4114 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
4116 /* First, iteratively propagate within the strongly connected component
4117 until all lattices stabilize. */
4118 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4119 if (v
->has_gimple_body_p ())
4121 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
4122 && opt_for_fn (v
->decl
, optimize
))
4123 push_node_to_stack (topo
, v
);
4124 /* When V is not optimized, we can not push it to stack, but
4125 still we need to set all its callees lattices to bottom. */
4128 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4129 propagate_constants_across_call (cs
);
4133 v
= pop_node_from_stack (topo
);
4136 struct cgraph_edge
*cs
;
4137 class ipa_node_params
*info
= NULL
;
4138 bool self_scc
= true;
4140 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4141 if (ipa_edge_within_scc (cs
))
4143 cgraph_node
*callee
= cs
->callee
->function_symbol ();
4150 info
= ipa_node_params_sum
->get (v
);
4151 info
->node_within_scc
= true;
4154 if (propagate_constants_across_call (cs
))
4155 push_node_to_stack (topo
, callee
);
4159 info
->node_is_self_scc
= self_scc
;
4161 v
= pop_node_from_stack (topo
);
4164 /* Afterwards, propagate along edges leading out of the SCC, calculates
4165 the local effects of the discovered constants and all valid values to
4166 their topological sort. */
4167 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
4168 if (v
->has_gimple_body_p ()
4169 && opt_for_fn (v
->decl
, flag_ipa_cp
)
4170 && opt_for_fn (v
->decl
, optimize
))
4172 struct cgraph_edge
*cs
;
4174 estimate_local_effects (v
);
4175 add_all_node_vals_to_toposort (v
, topo
);
4176 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
4177 if (!ipa_edge_within_scc (cs
))
4178 propagate_constants_across_call (cs
);
4180 cycle_nodes
.release ();
4184 /* Propagate the estimated effects of individual values along the topological
4185 from the dependent values to those they depend on. */
4187 template <typename valtype
>
4189 value_topo_info
<valtype
>::propagate_effects ()
4191 ipcp_value
<valtype
> *base
;
4192 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
4194 for (base
= values_topo
; base
; base
= base
->topo_next
)
4196 ipcp_value_source
<valtype
> *src
;
4197 ipcp_value
<valtype
> *val
;
4199 HOST_WIDE_INT size
= 0;
4201 for (val
= base
; val
; val
= val
->scc_next
)
4203 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
4204 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
4207 for (val
= base
; val
; val
= val
->scc_next
)
4209 processed_srcvals
.empty ();
4210 for (src
= val
->sources
; src
; src
= src
->next
)
4212 && src
->cs
->maybe_hot_p ())
4214 if (!processed_srcvals
.add (src
->val
))
4216 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
4217 if (prop_size
< INT_MAX
)
4218 src
->val
->prop_size_cost
= prop_size
;
4223 int special_factor
= 1;
4224 if (val
->same_scc (src
->val
))
4226 = opt_for_fn(src
->cs
->caller
->decl
,
4227 param_ipa_cp_recursive_freq_factor
);
4228 else if (val
->self_recursion_generated_p ()
4229 && (src
->cs
->callee
->function_symbol ()
4230 == src
->cs
->caller
))
4232 int max_recur_gen_depth
4233 = opt_for_fn(src
->cs
->caller
->decl
,
4234 param_ipa_cp_max_recursive_depth
);
4235 special_factor
= max_recur_gen_depth
4236 - val
->self_recursion_generated_level
+ 1;
4239 src
->val
->prop_time_benefit
4240 += time
* special_factor
* src
->cs
->sreal_frequency ();
4245 val
->prop_time_benefit
= time
;
4246 val
->prop_size_cost
= size
;
4250 val
->prop_time_benefit
= 0;
4251 val
->prop_size_cost
= 0;
4257 /* Callback for qsort to sort counts of all edges. */
4260 compare_edge_profile_counts (const void *a
, const void *b
)
4262 const profile_count
*cnt1
= (const profile_count
*) a
;
4263 const profile_count
*cnt2
= (const profile_count
*) b
;
4273 /* Propagate constants, polymorphic contexts and their effects from the
4274 summaries interprocedurally. */
4277 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4279 struct cgraph_node
*node
;
4282 fprintf (dump_file
, "\n Propagating constants:\n\n");
4284 base_count
= profile_count::uninitialized ();
4286 bool compute_count_base
= false;
4287 unsigned base_count_pos_percent
= 0;
4288 FOR_EACH_DEFINED_FUNCTION (node
)
4290 if (node
->has_gimple_body_p ()
4291 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4292 && opt_for_fn (node
->decl
, optimize
))
4294 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4295 determine_versionability (node
, info
);
4297 unsigned nlattices
= ipa_get_param_count (info
);
4298 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
4299 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
4300 initialize_node_lattices (node
);
4302 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4303 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4304 overall_size
+= s
->self_size
;
4305 if (node
->count
.ipa ().initialized_p ())
4307 compute_count_base
= true;
4308 unsigned pos_percent
= opt_for_fn (node
->decl
,
4309 param_ipa_cp_profile_count_base
);
4310 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4314 if (compute_count_base
)
4316 auto_vec
<profile_count
> all_edge_counts
;
4317 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4318 FOR_EACH_DEFINED_FUNCTION (node
)
4319 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4321 profile_count count
= cs
->count
.ipa ();
4322 if (!count
.nonzero_p ())
4325 enum availability avail
;
4327 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4328 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4329 if (info
&& info
->versionable
)
4330 all_edge_counts
.quick_push (count
);
4333 if (!all_edge_counts
.is_empty ())
4335 gcc_assert (base_count_pos_percent
<= 100);
4336 all_edge_counts
.qsort (compare_edge_profile_counts
);
4338 unsigned base_count_pos
4339 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4340 base_count
= all_edge_counts
[base_count_pos
];
4344 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4345 "position %u, arriving at: ", all_edge_counts
.length (),
4347 base_count
.dump (dump_file
);
4348 fprintf (dump_file
, "\n");
4352 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4353 "continuing as if without profile feedback.\n");
4356 orig_overall_size
= overall_size
;
4359 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4361 propagate_constants_topo (topo
);
4363 ipcp_verify_propagated_values ();
4364 topo
->constants
.propagate_effects ();
4365 topo
->contexts
.propagate_effects ();
4369 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4370 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4374 /* Discover newly direct outgoing edges from NODE which is a new clone with
4375 known KNOWN_CSTS and make them direct. */
4378 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4379 vec
<tree
> known_csts
,
4380 vec
<ipa_polymorphic_call_context
>
4382 vec
<ipa_argagg_value
, va_gc
> *aggvals
)
4384 struct cgraph_edge
*ie
, *next_ie
;
4387 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4392 next_ie
= ie
->next_callee
;
4393 ipa_argagg_value_list
avs (aggvals
);
4394 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4398 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4399 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4400 int param_index
= ie
->indirect_info
->param_index
;
4401 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4405 if (cs
&& !agg_contents
&& !polymorphic
)
4407 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4408 int c
= ipa_get_controlled_uses (info
, param_index
);
4409 if (c
!= IPA_UNDESCRIBED_USE
4410 && !ipa_get_param_load_dereferenced (info
, param_index
))
4412 struct ipa_ref
*to_del
;
4415 ipa_set_controlled_uses (info
, param_index
, c
);
4416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4417 fprintf (dump_file
, " controlled uses count of param "
4418 "%i bumped down to %i\n", param_index
, c
);
4420 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0,
4423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4424 fprintf (dump_file
, " and even removing its "
4425 "cloning-created reference\n");
4426 to_del
->remove_reference ();
4432 /* Turning calls to direct calls will improve overall summary. */
4434 ipa_update_overall_fn_summary (node
);
4437 class edge_clone_summary
;
4438 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4440 /* Edge clone summary. */
4442 class edge_clone_summary
4445 /* Default constructor. */
4446 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4448 /* Default destructor. */
4449 ~edge_clone_summary ()
4452 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4454 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4457 cgraph_edge
*prev_clone
;
4458 cgraph_edge
*next_clone
;
4461 class edge_clone_summary_t
:
4462 public call_summary
<edge_clone_summary
*>
4465 edge_clone_summary_t (symbol_table
*symtab
):
4466 call_summary
<edge_clone_summary
*> (symtab
)
4468 m_initialize_when_cloning
= true;
4471 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4472 edge_clone_summary
*src_data
,
4473 edge_clone_summary
*dst_data
) final override
;
4476 /* Edge duplication hook. */
4479 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4480 edge_clone_summary
*src_data
,
4481 edge_clone_summary
*dst_data
)
4483 if (src_data
->next_clone
)
4484 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4485 dst_data
->prev_clone
= src_edge
;
4486 dst_data
->next_clone
= src_data
->next_clone
;
4487 src_data
->next_clone
= dst_edge
;
4490 /* Return true is CS calls DEST or its clone for all contexts. When
4491 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4492 edges from/to an all-context clone. */
4495 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4496 bool allow_recursion_to_clone
)
4498 enum availability availability
;
4499 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4501 if (availability
<= AVAIL_INTERPOSABLE
)
4505 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4508 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4509 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4512 /* Return true if edge CS does bring about the value described by SRC to
4513 DEST_VAL of node DEST or its clone for all contexts. */
4516 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4517 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4519 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4521 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4522 || caller_info
->node_dead
)
4528 if (caller_info
->ipcp_orig_node
)
4531 if (src
->offset
== -1)
4532 t
= caller_info
->known_csts
[src
->index
];
4533 else if (ipcp_transformation
*ts
4534 = ipcp_get_transformation_summary (cs
->caller
))
4536 ipa_argagg_value_list
avl (ts
);
4537 t
= avl
.get_value (src
->index
, src
->offset
/ BITS_PER_UNIT
);
4539 return (t
!= NULL_TREE
4540 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4544 if (src
->val
== dest_val
)
4547 struct ipcp_agg_lattice
*aglat
;
4548 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4550 if (src
->offset
== -1)
4551 return (plats
->itself
.is_single_const ()
4552 && values_equal_for_ipcp_p (src
->val
->value
,
4553 plats
->itself
.values
->value
));
4556 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4558 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4559 if (aglat
->offset
== src
->offset
)
4560 return (aglat
->is_single_const ()
4561 && values_equal_for_ipcp_p (src
->val
->value
,
4562 aglat
->values
->value
));
4568 /* Return true if edge CS does bring about the value described by SRC to
4569 DST_VAL of node DEST or its clone for all contexts. */
4572 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4573 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4575 ipcp_value
<ipa_polymorphic_call_context
> *)
4577 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4579 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4580 || caller_info
->node_dead
)
4585 if (caller_info
->ipcp_orig_node
)
4586 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4587 && values_equal_for_ipcp_p (src
->val
->value
,
4588 caller_info
->known_contexts
[src
->index
]);
4590 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4592 return plats
->ctxlat
.is_single_const ()
4593 && values_equal_for_ipcp_p (src
->val
->value
,
4594 plats
->ctxlat
.values
->value
);
4597 /* Get the next clone in the linked list of clones of an edge. */
4599 static inline struct cgraph_edge
*
4600 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4602 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4603 return s
!= NULL
? s
->next_clone
: NULL
;
4606 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4607 of them is viable and hot, return true. In that case, for those that still
4608 hold, add their edge frequency and their number and cumulative profile
4609 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4610 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4612 template <typename valtype
>
4614 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4615 sreal
*freq_sum
, int *caller_count
,
4616 profile_count
*rec_count_sum
,
4617 profile_count
*nonrec_count_sum
)
4619 ipcp_value_source
<valtype
> *src
;
4622 profile_count rec_cnt
= profile_count::zero ();
4623 profile_count nonrec_cnt
= profile_count::zero ();
4625 bool non_self_recursive
= false;
4627 for (src
= val
->sources
; src
; src
= src
->next
)
4629 struct cgraph_edge
*cs
= src
->cs
;
4632 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4635 freq
+= cs
->sreal_frequency ();
4636 hot
|= cs
->maybe_hot_p ();
4637 if (cs
->caller
!= dest
)
4639 non_self_recursive
= true;
4640 if (cs
->count
.ipa ().initialized_p ())
4641 rec_cnt
+= cs
->count
.ipa ();
4643 else if (cs
->count
.ipa ().initialized_p ())
4644 nonrec_cnt
+= cs
->count
.ipa ();
4646 cs
= get_next_cgraph_edge_clone (cs
);
4650 /* If the only edges bringing a value are self-recursive ones, do not bother
4652 if (!non_self_recursive
)
4656 *caller_count
= count
;
4657 *rec_count_sum
= rec_cnt
;
4658 *nonrec_count_sum
= nonrec_cnt
;
4660 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4662 struct cgraph_edge
*cs
;
4664 /* Cold non-SCC source edge could trigger hot recursive execution of
4665 function. Consider the case as hot and rely on following cost model
4666 computation to further select right one. */
4667 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4668 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4675 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4676 to let a non-self-recursive caller be the first element. Thus, we can
4677 simplify intersecting operations on values that arrive from all of these
4678 callers, especially when there exists self-recursive call. Return true if
4679 this kind of adjustment is possible. */
4682 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4685 for (unsigned i
= 0; i
< callers
.length (); i
++)
4687 cgraph_edge
*cs
= callers
[i
];
4689 if (cs
->caller
!= node
)
4693 callers
[i
] = callers
[0];
4702 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4703 is assumed their number is known and equal to CALLER_COUNT. */
4705 template <typename valtype
>
4706 static vec
<cgraph_edge
*>
4707 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4710 ipcp_value_source
<valtype
> *src
;
4711 vec
<cgraph_edge
*> ret
;
4713 ret
.create (caller_count
);
4714 for (src
= val
->sources
; src
; src
= src
->next
)
4716 struct cgraph_edge
*cs
= src
->cs
;
4719 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4720 ret
.quick_push (cs
);
4721 cs
= get_next_cgraph_edge_clone (cs
);
4725 if (caller_count
> 1)
4726 adjust_callers_for_value_intersection (ret
, dest
);
4731 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4732 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4733 should be set to true when the reference created for the constant should be
4734 a load one and not an address one because the corresponding parameter p is
4737 static struct ipa_replace_map
*
4738 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4739 bool force_load_ref
)
4741 struct ipa_replace_map
*replace_map
;
4743 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4746 fprintf (dump_file
, " replacing ");
4747 ipa_dump_param (dump_file
, info
, parm_num
);
4749 fprintf (dump_file
, " with const ");
4750 print_generic_expr (dump_file
, value
);
4753 fprintf (dump_file
, " - forcing load reference\n");
4755 fprintf (dump_file
, "\n");
4757 replace_map
->parm_num
= parm_num
;
4758 replace_map
->new_tree
= value
;
4759 replace_map
->force_load_ref
= force_load_ref
;
4763 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4764 one, otherwise it will be referred to as the original node. */
4767 dump_profile_updates (cgraph_node
*node
, bool spec
)
4770 fprintf (dump_file
, " setting count of the specialized node %s to ",
4771 node
->dump_name ());
4773 fprintf (dump_file
, " setting count of the original node %s to ",
4774 node
->dump_name ());
4776 node
->count
.dump (dump_file
);
4777 fprintf (dump_file
, "\n");
4778 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4780 fprintf (dump_file
, " edge to %s has count ",
4781 cs
->callee
->dump_name ());
4782 cs
->count
.dump (dump_file
);
4783 fprintf (dump_file
, "\n");
4787 /* With partial train run we do not want to assume that original's count is
4788 zero whenever we redurect all executed edges to clone. Simply drop profile
4789 to local one in this case. In eany case, return the new value. ORIG_NODE
4790 is the original node and its count has not been updaed yet. */
4793 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4795 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4796 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4797 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4798 remainder
= remainder
.guessed_local ();
4803 /* Structure to sum counts coming from nodes other than the original node and
4806 struct gather_other_count_struct
4809 profile_count other_count
;
4812 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4813 counts that come from non-self-recursive calls.. */
4816 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4818 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4819 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4820 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4821 desc
->other_count
+= cs
->count
.ipa ();
4825 /* Structure to help analyze if we need to boost counts of some clones of some
4826 non-recursive edges to match the new callee count. */
4828 struct desc_incoming_count_struct
4831 hash_set
<cgraph_edge
*> *processed_edges
;
4832 profile_count count
;
4833 unsigned unproc_orig_rec_edges
;
4836 /* Go over edges calling NODE and its thunks and gather information about
4837 incoming counts so that we know if we need to make any adjustments. */
4840 analyze_clone_icoming_counts (cgraph_node
*node
,
4841 desc_incoming_count_struct
*desc
)
4843 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4844 if (cs
->caller
->thunk
)
4846 analyze_clone_icoming_counts (cs
->caller
, desc
);
4851 if (cs
->count
.initialized_p ())
4852 desc
->count
+= cs
->count
.ipa ();
4853 if (!desc
->processed_edges
->contains (cs
)
4854 && cs
->caller
->clone_of
== desc
->orig
)
4855 desc
->unproc_orig_rec_edges
++;
4859 /* If caller edge counts of a clone created for a self-recursive arithmetic
4860 jump function must be adjusted because it is coming from a the "seed" clone
4861 for the first value and so has been excessively scaled back as if it was not
4862 a recursive call, adjust it so that the incoming counts of NODE match its
4863 count. NODE is the node or its thunk. */
4866 adjust_clone_incoming_counts (cgraph_node
*node
,
4867 desc_incoming_count_struct
*desc
)
4869 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4870 if (cs
->caller
->thunk
)
4872 adjust_clone_incoming_counts (cs
->caller
, desc
);
4873 profile_count sum
= profile_count::zero ();
4874 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4875 if (e
->count
.initialized_p ())
4876 sum
+= e
->count
.ipa ();
4877 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4879 else if (!desc
->processed_edges
->contains (cs
)
4880 && cs
->caller
->clone_of
== desc
->orig
)
4882 cs
->count
+= desc
->count
;
4885 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4886 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4887 cs
->callee
->dump_name ());
4888 cs
->count
.dump (dump_file
);
4889 fprintf (dump_file
, "\n");
4894 /* When ORIG_NODE has been cloned for values which have been generated fora
4895 self-recursive call as a result of an arithmetic pass-through
4896 jump-functions, adjust its count together with counts of all such clones in
4897 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4899 The function sums the counts of the original node and all its clones that
4900 cannot be attributed to a specific clone because it comes from a
4901 non-recursive edge. This sum is then evenly divided between the clones and
4902 on top of that each one gets all the counts which can be attributed directly
4906 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4907 const vec
<cgraph_node
*> &self_gen_clones
)
4909 profile_count redist_sum
= orig_node
->count
.ipa ();
4910 if (!(redist_sum
> profile_count::zero ()))
4914 fprintf (dump_file
, " Updating profile of self recursive clone "
4917 gather_other_count_struct gocs
;
4918 gocs
.orig
= orig_node
;
4919 gocs
.other_count
= profile_count::zero ();
4921 auto_vec
<profile_count
, 8> other_edges_count
;
4922 for (cgraph_node
*n
: self_gen_clones
)
4924 gocs
.other_count
= profile_count::zero ();
4925 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4927 other_edges_count
.safe_push (gocs
.other_count
);
4928 redist_sum
-= gocs
.other_count
;
4931 hash_set
<cgraph_edge
*> processed_edges
;
4933 for (cgraph_node
*n
: self_gen_clones
)
4935 profile_count orig_count
= n
->count
;
4936 profile_count new_count
4937 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4938 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4939 n
->count
= new_count
;
4940 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4941 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4943 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4944 processed_edges
.add (cs
);
4946 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4947 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4952 /* There are still going to be edges to ORIG_NODE that have one or more
4953 clones coming from another node clone in SELF_GEN_CLONES and which we
4954 scaled by the same amount, which means that the total incoming sum of
4955 counts to ORIG_NODE will be too high, scale such edges back. */
4956 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4958 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4961 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4962 if (e
->callee
->ultimate_alias_target () == orig_node
4963 && processed_edges
.contains (e
))
4966 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4967 if (e
->callee
->ultimate_alias_target () == orig_node
4968 && processed_edges
.contains (e
))
4973 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4974 along self-recursive edges are likely to have fairly low count and so
4975 edges from them to nodes in the self_gen_clones do not correspond to the
4976 artificially distributed count of the nodes, the total sum of incoming
4977 edges to some clones might be too low. Detect this situation and correct
4979 for (cgraph_node
*n
: self_gen_clones
)
4981 if (!(n
->count
.ipa () > profile_count::zero ()))
4984 desc_incoming_count_struct desc
;
4985 desc
.orig
= orig_node
;
4986 desc
.processed_edges
= &processed_edges
;
4987 desc
.count
= profile_count::zero ();
4988 desc
.unproc_orig_rec_edges
= 0;
4989 analyze_clone_icoming_counts (n
, &desc
);
4991 if (n
->count
.differs_from_p (desc
.count
))
4993 if (n
->count
> desc
.count
4994 && desc
.unproc_orig_rec_edges
> 0)
4996 desc
.count
= n
->count
- desc
.count
;
4997 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
4998 adjust_clone_incoming_counts (n
, &desc
);
5002 " Unable to fix up incoming counts for %s.\n",
5008 for (cgraph_node
*n
: self_gen_clones
)
5009 dump_profile_updates (n
, n
!= orig_node
);
5013 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5014 their profile information to reflect this. This function should not be used
5015 for clones generated for arithmetic pass-through jump functions on a
5016 self-recursive call graph edge, that situation is handled by
5017 update_counts_for_self_gen_clones. */
5020 update_profiling_info (struct cgraph_node
*orig_node
,
5021 struct cgraph_node
*new_node
)
5023 struct caller_statistics stats
;
5024 profile_count new_sum
;
5025 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
5027 if (!(orig_node_count
> profile_count::zero ()))
5032 fprintf (dump_file
, " Updating profile from original count: ");
5033 orig_node_count
.dump (dump_file
);
5034 fprintf (dump_file
, "\n");
5037 init_caller_stats (&stats
, new_node
);
5038 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
5040 new_sum
= stats
.count_sum
;
5042 bool orig_edges_processed
= false;
5043 if (new_sum
> orig_node_count
)
5045 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5046 to global0 category. */
5047 remainder
= orig_node
->count
.global0 ();
5049 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5050 cs
->count
= cs
->count
.global0 ();
5051 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5053 cs
= cs
->next_callee
)
5054 cs
->count
= cs
->count
.global0 ();
5055 orig_edges_processed
= true;
5057 else if (stats
.rec_count_sum
.nonzero_p ())
5059 int new_nonrec_calls
= stats
.n_nonrec_calls
;
5060 /* There are self-recursive edges which are likely to bring in the
5061 majority of calls but which we must divide in between the original and
5063 init_caller_stats (&stats
, orig_node
);
5064 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
5066 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
5067 profile_count orig_nonrec_call_count
= stats
.count_sum
;
5069 if (orig_node
->local
)
5071 if (!orig_nonrec_call_count
.nonzero_p ())
5074 fprintf (dump_file
, " The original is local and the only "
5075 "incoming edges from non-dead callers with nonzero "
5076 "counts are self-recursive, assuming it is cold.\n");
5077 /* The NEW_NODE count and counts of all its outgoing edges
5078 are still unmodified copies of ORIG_NODE's. Just clear
5079 the latter and bail out. */
5081 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
5082 zero
= profile_count::zero ().guessed_local ();
5084 zero
= profile_count::adjusted_zero ();
5085 orig_node
->count
= zero
;
5086 for (cgraph_edge
*cs
= orig_node
->callees
;
5088 cs
= cs
->next_callee
)
5090 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5092 cs
= cs
->next_callee
)
5099 /* Let's behave as if there was another caller that accounts for all
5100 the calls that were either indirect or from other compilation
5102 orig_nonrec_calls
++;
5103 profile_count pretend_caller_count
5104 = (orig_node_count
- new_sum
- orig_nonrec_call_count
5105 - stats
.rec_count_sum
);
5106 orig_nonrec_call_count
+= pretend_caller_count
;
5109 /* Divide all "unexplained" counts roughly proportionally to sums of
5110 counts of non-recursive calls.
5112 We put rather arbitrary limits on how many counts we claim because the
5113 number of non-self-recursive incoming count is only a rough guideline
5114 and there are cases (such as mcf) where using it blindly just takes
5115 too many. And if lattices are considered in the opposite order we
5116 could also take too few. */
5117 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
5119 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
5120 profile_count new_part
5121 = MAX(MIN (unexp
.apply_scale (new_sum
,
5122 new_sum
+ orig_nonrec_call_count
),
5123 unexp
.apply_scale (limit_den
- 1, limit_den
)),
5124 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
5127 fprintf (dump_file
, " Claiming ");
5128 new_part
.dump (dump_file
);
5129 fprintf (dump_file
, " of unexplained ");
5130 unexp
.dump (dump_file
);
5131 fprintf (dump_file
, " counts because of self-recursive "
5134 new_sum
+= new_part
;
5135 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5139 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
5142 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
5143 new_node
->count
= new_sum
;
5144 orig_node
->count
= remainder
;
5146 profile_count orig_new_node_count
= orig_node_count
;
5147 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
5148 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5149 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5150 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
5151 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
5153 if (!orig_edges_processed
)
5155 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
5156 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5157 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5158 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
5160 cs
= cs
->next_callee
)
5161 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
5166 dump_profile_updates (new_node
, true);
5167 dump_profile_updates (orig_node
, false);
5171 /* Update the respective profile of specialized NEW_NODE and the original
5172 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5173 have been redirected to the specialized version. */
5176 update_specialized_profile (struct cgraph_node
*new_node
,
5177 struct cgraph_node
*orig_node
,
5178 profile_count redirected_sum
)
5180 struct cgraph_edge
*cs
;
5181 profile_count new_node_count
, orig_node_count
= orig_node
->count
.ipa ();
5185 fprintf (dump_file
, " the sum of counts of redirected edges is ");
5186 redirected_sum
.dump (dump_file
);
5187 fprintf (dump_file
, "\n old ipa count of the original node is ");
5188 orig_node_count
.dump (dump_file
);
5189 fprintf (dump_file
, "\n");
5191 if (!(orig_node_count
> profile_count::zero ()))
5194 new_node_count
= new_node
->count
;
5195 new_node
->count
+= redirected_sum
;
5197 = lenient_count_portion_handling (orig_node
->count
- redirected_sum
,
5200 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
5201 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
5203 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
5205 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
5212 dump_profile_updates (new_node
, true);
5213 dump_profile_updates (orig_node
, false);
5217 static void adjust_references_in_caller (cgraph_edge
*cs
,
5218 symtab_node
*symbol
, int index
);
5220 /* Simple structure to pass a symbol and index (with same meaning as parameters
5221 of adjust_references_in_caller) through a void* parameter of a
5222 call_for_symbol_thunks_and_aliases callback. */
5223 struct symbol_and_index_together
5225 symtab_node
*symbol
;
5229 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5230 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5232 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
5234 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
5235 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
5236 if (!cs
->caller
->thunk
)
5237 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
5241 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5242 variable which is only dereferenced and which is represented by SYMBOL. See
5243 if we can remove ADDR reference in callers assosiated witht the call. */
5246 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
5248 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5249 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
5250 if (jfunc
->type
== IPA_JF_CONST
)
5252 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
5257 to_del
->remove_reference ();
5258 ipa_zap_jf_refdesc (jfunc
);
5260 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5261 cs
->caller
->dump_name (), symbol
->dump_name ());
5265 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
5266 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
5267 || ipa_get_jf_pass_through_refdesc_decremented (jfunc
))
5270 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5271 cgraph_node
*caller
= cs
->caller
;
5272 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
5273 /* TODO: This consistency check may be too big and not really
5274 that useful. Consider removing it. */
5276 if (caller_info
->ipcp_orig_node
)
5277 cst
= caller_info
->known_csts
[fidx
];
5280 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
5281 gcc_assert (lat
->is_single_const ());
5282 cst
= lat
->values
->value
;
5284 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
5285 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
5288 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
5289 if (cuses
== IPA_UNDESCRIBED_USE
)
5291 gcc_assert (cuses
> 0);
5293 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
5294 ipa_set_jf_pass_through_refdesc_decremented (jfunc
, true);
5295 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5296 fprintf (dump_file
, " Controlled uses of parameter %i of %s dropped "
5297 "to %i.\n", fidx
, caller
->dump_name (), cuses
);
5301 if (caller_info
->ipcp_orig_node
)
5303 /* Cloning machinery has created a reference here, we need to either
5304 remove it or change it to a read one. */
5305 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0, IPA_REF_ADDR
);
5308 to_del
->remove_reference ();
5310 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5311 cs
->caller
->dump_name (), symbol
->dump_name ());
5312 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5314 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5317 " ...and replaced it with LOAD one.\n");
5322 symbol_and_index_together pack
;
5323 pack
.symbol
= symbol
;
5325 if (caller
->can_change_signature
)
5326 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5331 /* Return true if we would like to remove a parameter from NODE when cloning it
5332 with KNOWN_CSTS scalar constants. */
5335 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5337 auto_vec
<bool, 16> surviving
;
5338 bool filled_vec
= false;
5339 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5340 int i
, count
= ipa_get_param_count (info
);
5342 for (i
= 0; i
< count
; i
++)
5344 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5349 clone_info
*info
= clone_info::get (node
);
5350 if (!info
|| !info
->param_adjustments
)
5352 info
->param_adjustments
->get_surviving_params (&surviving
);
5355 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5361 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5362 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5363 redirect all edges in CALLERS to it. */
5365 static struct cgraph_node
*
5366 create_specialized_node (struct cgraph_node
*node
,
5367 vec
<tree
> known_csts
,
5368 vec
<ipa_polymorphic_call_context
> known_contexts
,
5369 vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5370 vec
<cgraph_edge
*> &callers
)
5372 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5373 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5374 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5375 struct cgraph_node
*new_node
;
5376 int i
, count
= ipa_get_param_count (info
);
5377 clone_info
*cinfo
= clone_info::get (node
);
5378 ipa_param_adjustments
*old_adjustments
= cinfo
5379 ? cinfo
->param_adjustments
: NULL
;
5380 ipa_param_adjustments
*new_adjustments
;
5381 gcc_assert (!info
->ipcp_orig_node
);
5382 gcc_assert (node
->can_change_signature
5383 || !old_adjustments
);
5385 if (old_adjustments
)
5387 /* At the moment all IPA optimizations should use the number of
5388 parameters of the prevailing decl as the m_always_copy_start.
5389 Handling any other value would complicate the code below, so for the
5390 time bing let's only assert it is so. */
5391 gcc_assert (old_adjustments
->m_always_copy_start
== count
5392 || old_adjustments
->m_always_copy_start
< 0);
5393 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5394 for (i
= 0; i
< old_adj_count
; i
++)
5396 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5397 if (!node
->can_change_signature
5398 || old_adj
->op
!= IPA_PARAM_OP_COPY
5399 || (!known_csts
[old_adj
->base_index
]
5400 && ipa_is_param_used (info
, old_adj
->base_index
)))
5402 ipa_adjusted_param new_adj
= *old_adj
;
5404 new_adj
.prev_clone_adjustment
= true;
5405 new_adj
.prev_clone_index
= i
;
5406 vec_safe_push (new_params
, new_adj
);
5409 bool skip_return
= old_adjustments
->m_skip_return
;
5410 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5411 ipa_param_adjustments (new_params
, count
,
5414 else if (node
->can_change_signature
5415 && want_remove_some_param_p (node
, known_csts
))
5417 ipa_adjusted_param adj
;
5418 memset (&adj
, 0, sizeof (adj
));
5419 adj
.op
= IPA_PARAM_OP_COPY
;
5420 for (i
= 0; i
< count
; i
++)
5421 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5424 adj
.prev_clone_index
= i
;
5425 vec_safe_push (new_params
, adj
);
5427 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5428 ipa_param_adjustments (new_params
, count
, false));
5431 new_adjustments
= NULL
;
5433 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5434 for (i
= callers
.length () - 1; i
>= 0; i
--)
5436 cgraph_edge
*cs
= callers
[i
];
5437 if (cs
->caller
== node
)
5439 self_recursive_calls
.safe_push (cs
);
5440 callers
.unordered_remove (i
);
5443 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5444 for (i
= 0; i
< count
; i
++)
5446 tree t
= known_csts
[i
];
5450 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5452 bool load_ref
= false;
5453 symtab_node
*ref_symbol
;
5454 if (TREE_CODE (t
) == ADDR_EXPR
)
5456 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5457 if (TREE_CODE (base
) == VAR_DECL
5458 && ipa_get_controlled_uses (info
, i
) == 0
5459 && ipa_get_param_load_dereferenced (info
, i
)
5460 && (ref_symbol
= symtab_node::get (base
)))
5463 if (node
->can_change_signature
)
5464 for (cgraph_edge
*caller
: callers
)
5465 adjust_references_in_caller (caller
, ref_symbol
, i
);
5469 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5471 vec_safe_push (replace_trees
, replace_map
);
5474 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5475 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5477 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5478 new_adjustments
, "constprop",
5482 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5483 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5485 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5486 /* Cloned edges can disappear during cloning as speculation can be
5487 resolved, check that we have one and that it comes from the last
5489 if (cs
&& cs
->caller
== new_node
)
5490 cs
->redirect_callee_duplicating_thunks (new_node
);
5491 /* Any future code that would make more than one clone of an outgoing
5492 edge would confuse this mechanism, so let's check that does not
5494 gcc_checking_assert (!cs
5495 || !get_next_cgraph_edge_clone (cs
)
5496 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5498 if (have_self_recursive_calls
)
5499 new_node
->expand_all_artificial_thunks ();
5501 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5502 for (const ipa_argagg_value
&av
: aggvals
)
5503 new_node
->maybe_create_reference (av
.value
, NULL
);
5505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5507 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5508 if (known_contexts
.exists ())
5510 for (i
= 0; i
< count
; i
++)
5511 if (!known_contexts
[i
].useless_p ())
5513 fprintf (dump_file
, " known ctx %i is ", i
);
5514 known_contexts
[i
].dump (dump_file
);
5519 fprintf (dump_file
, " Aggregate replacements:");
5520 ipa_argagg_value_list
avs (aggvals
);
5521 avs
.dump (dump_file
);
5525 new_info
= ipa_node_params_sum
->get (new_node
);
5526 new_info
->ipcp_orig_node
= node
;
5527 new_node
->ipcp_clone
= true;
5528 new_info
->known_csts
= known_csts
;
5529 new_info
->known_contexts
= known_contexts
;
5531 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
,
5537 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5538 pass-through function to itself when the cgraph_node involved is not an
5539 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5540 no-operation pass-through. */
5543 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5546 enum availability availability
;
5547 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5548 && availability
> AVAIL_INTERPOSABLE
5549 && jfunc
->type
== IPA_JF_PASS_THROUGH
5550 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5551 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5552 && ipa_node_params_sum
->get (cs
->caller
)
5553 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5558 /* Return true if JFUNC, which describes a part of an aggregate represented or
5559 pointed to by the i-th parameter of call CS, is a pass-through function to
5560 itself when the cgraph_node involved is not an IPA-CP clone.. When
5561 SIMPLE is true, further check if JFUNC is a simple no-operation
5565 self_recursive_agg_pass_through_p (const cgraph_edge
*cs
,
5566 const ipa_agg_jf_item
*jfunc
,
5567 int i
, bool simple
= true)
5569 enum availability availability
;
5570 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5571 && availability
> AVAIL_INTERPOSABLE
5572 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5573 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5574 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5575 && jfunc
->value
.pass_through
.formal_id
== i
5576 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5577 && ipa_node_params_sum
->get (cs
->caller
)
5578 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5583 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5584 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5587 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5588 vec
<tree
> &known_csts
,
5589 const vec
<cgraph_edge
*> &callers
)
5591 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5592 int i
, count
= ipa_get_param_count (info
);
5594 for (i
= 0; i
< count
; i
++)
5596 struct cgraph_edge
*cs
;
5597 tree newval
= NULL_TREE
;
5600 tree type
= ipa_get_type (info
, i
);
5602 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5605 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5607 struct ipa_jump_func
*jump_func
;
5610 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5612 || i
>= ipa_get_cs_argument_count (args
)
5614 && call_passes_through_thunk (cs
)))
5619 jump_func
= ipa_get_ith_jump_func (args
, i
);
5621 /* Besides simple pass-through jump function, arithmetic jump
5622 function could also introduce argument-direct-pass-through for
5623 self-feeding recursive call. For example,
5630 Given that i is 0, recursive propagation via (i & 1) also gets
5632 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5634 gcc_assert (newval
);
5635 t
= ipa_get_jf_arith_result (
5636 ipa_get_jf_pass_through_operation (jump_func
),
5638 ipa_get_jf_pass_through_operand (jump_func
),
5642 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5646 && !values_equal_for_ipcp_p (t
, newval
))
5647 || (!first
&& !newval
))
5659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5661 fprintf (dump_file
, " adding an extra known scalar value ");
5662 print_ipcp_constant_value (dump_file
, newval
);
5663 fprintf (dump_file
, " for ");
5664 ipa_dump_param (dump_file
, info
, i
);
5665 fprintf (dump_file
, "\n");
5668 known_csts
[i
] = newval
;
5673 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5674 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5678 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5679 vec
<ipa_polymorphic_call_context
>
5681 const vec
<cgraph_edge
*> &callers
)
5683 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5684 int i
, count
= ipa_get_param_count (info
);
5686 for (i
= 0; i
< count
; i
++)
5690 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5691 || (known_contexts
->exists ()
5692 && !(*known_contexts
)[i
].useless_p ()))
5695 ipa_polymorphic_call_context newval
;
5699 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5701 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5703 || i
>= ipa_get_cs_argument_count (args
))
5705 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5706 ipa_polymorphic_call_context ctx
;
5707 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5715 newval
.meet_with (ctx
);
5716 if (newval
.useless_p ())
5720 if (!newval
.useless_p ())
5722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5724 fprintf (dump_file
, " adding an extra known polymorphic "
5726 print_ipcp_constant_value (dump_file
, newval
);
5727 fprintf (dump_file
, " for ");
5728 ipa_dump_param (dump_file
, info
, i
);
5729 fprintf (dump_file
, "\n");
5732 if (!known_contexts
->exists ())
5733 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5735 (*known_contexts
)[i
] = newval
;
5741 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5742 RES. If INTERIM is non-NULL, it contains the current interim state of
5743 collected aggregate values which can be used to compute values passed over
5744 self-recursive edges.
5746 This basically one iteration of push_agg_values_from_edge over one
5747 parameter, which allows for simpler early returns. */
5750 push_agg_values_for_index_from_edge (struct cgraph_edge
*cs
, int index
,
5751 vec
<ipa_argagg_value
> *res
,
5752 const ipa_argagg_value_list
*interim
)
5754 bool agg_values_from_caller
= false;
5755 bool agg_jf_preserved
= false;
5756 unsigned unit_delta
= UINT_MAX
;
5758 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
),
5761 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5762 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5764 agg_values_from_caller
= true;
5765 agg_jf_preserved
= ipa_get_jf_pass_through_agg_preserved (jfunc
);
5766 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5769 else if (jfunc
->type
== IPA_JF_ANCESTOR
5770 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5772 agg_values_from_caller
= true;
5773 agg_jf_preserved
= true;
5774 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5775 unit_delta
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
5778 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5779 if (agg_values_from_caller
)
5781 if (caller_info
->ipcp_orig_node
)
5783 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5784 ipcp_transformation
*ts
5785 = ipcp_get_transformation_summary (cs
->caller
);
5786 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5787 ipcp_param_lattices
*orig_plats
5788 = ipa_get_parm_lattices (orig_info
, src_idx
);
5791 && (agg_jf_preserved
|| !orig_plats
->aggs_by_ref
))
5793 ipa_argagg_value_list
src (ts
);
5794 src
.push_adjusted_values (src_idx
, index
, unit_delta
, res
);
5800 ipcp_param_lattices
*src_plats
5801 = ipa_get_parm_lattices (caller_info
, src_idx
);
5803 && !src_plats
->aggs_bottom
5804 && (agg_jf_preserved
|| !src_plats
->aggs_by_ref
))
5806 if (interim
&& self_recursive_pass_through_p (cs
, jfunc
, index
))
5808 interim
->push_adjusted_values (src_idx
, index
, unit_delta
,
5812 if (!src_plats
->aggs_contain_variable
)
5814 push_agg_values_from_plats (src_plats
, index
, unit_delta
,
5822 if (!jfunc
->agg
.items
)
5825 unsigned prev_unit_offset
= 0;
5826 for (const ipa_agg_jf_item
&agg_jf
: *jfunc
->agg
.items
)
5828 tree value
, srcvalue
;
5829 /* Besides simple pass-through aggregate jump function, arithmetic
5830 aggregate jump function could also bring same aggregate value as
5831 parameter passed-in for self-feeding recursive call. For example,
5839 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5841 && self_recursive_agg_pass_through_p (cs
, &agg_jf
, index
, false)
5842 && (srcvalue
= interim
->get_value(index
,
5843 agg_jf
.offset
/ BITS_PER_UNIT
)))
5844 value
= ipa_get_jf_arith_result (agg_jf
.value
.pass_through
.operation
,
5846 agg_jf
.value
.pass_through
.operand
,
5849 value
= ipa_agg_value_from_jfunc (caller_info
, cs
->caller
,
5853 struct ipa_argagg_value iav
;
5855 iav
.unit_offset
= agg_jf
.offset
/ BITS_PER_UNIT
;
5857 iav
.by_ref
= jfunc
->agg
.by_ref
;
5861 || iav
.unit_offset
> prev_unit_offset
);
5862 prev_unit_offset
= iav
.unit_offset
;
5865 res
->safe_push (iav
);
5871 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5872 description of ultimate callee of CS or the one it was cloned from (the
5873 summary where lattices are). If INTERIM is non-NULL, it contains the
5874 current interim state of collected aggregate values which can be used to
5875 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5876 is true) and to skip values which clearly will not be part of intersection
5880 push_agg_values_from_edge (struct cgraph_edge
*cs
,
5881 ipa_node_params
*dest_info
,
5882 vec
<ipa_argagg_value
> *res
,
5883 const ipa_argagg_value_list
*interim
,
5884 bool optimize_self_recursion
)
5886 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5890 int count
= MIN (ipa_get_param_count (dest_info
),
5891 ipa_get_cs_argument_count (args
));
5893 unsigned interim_index
= 0;
5894 for (int index
= 0; index
< count
; index
++)
5898 while (interim_index
< interim
->m_elts
.size ()
5899 && interim
->m_elts
[interim_index
].value
5900 && interim
->m_elts
[interim_index
].index
< index
)
5902 if (interim_index
>= interim
->m_elts
.size ()
5903 || interim
->m_elts
[interim_index
].index
> index
)
5907 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, index
);
5908 if (!ipa_is_param_used (dest_info
, index
)
5909 || plats
->aggs_bottom
)
5911 push_agg_values_for_index_from_edge (cs
, index
, res
,
5912 optimize_self_recursion
? interim
5918 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5919 from all of them. Return nullptr if there are none. */
5921 static struct vec
<ipa_argagg_value
, va_gc
> *
5922 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5923 const vec
<cgraph_edge
*> &callers
)
5925 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5926 if (dest_info
->ipcp_orig_node
)
5927 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5929 /* gather_edges_for_value puts a non-recursive call into the first element of
5930 callers if it can. */
5931 auto_vec
<ipa_argagg_value
, 32> interim
;
5932 push_agg_values_from_edge (callers
[0], dest_info
, &interim
, NULL
, true);
5934 unsigned valid_entries
= interim
.length ();
5938 unsigned caller_count
= callers
.length();
5939 for (unsigned i
= 1; i
< caller_count
; i
++)
5941 auto_vec
<ipa_argagg_value
, 32> last
;
5942 ipa_argagg_value_list
avs (&interim
);
5943 push_agg_values_from_edge (callers
[i
], dest_info
, &last
, &avs
, true);
5945 valid_entries
= intersect_argaggs_with (interim
, last
);
5950 vec
<ipa_argagg_value
, va_gc
> *res
= NULL
;
5951 vec_safe_reserve_exact (res
, valid_entries
);
5952 for (const ipa_argagg_value
&av
: interim
)
5954 res
->quick_push(av
);
5955 gcc_checking_assert (res
->length () == valid_entries
);
5959 /* Determine whether CS also brings all scalar values that the NODE is
5963 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5964 struct cgraph_node
*node
)
5966 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5967 int count
= ipa_get_param_count (dest_info
);
5968 class ipa_node_params
*caller_info
;
5969 class ipa_edge_args
*args
;
5972 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5973 args
= ipa_edge_args_sum
->get (cs
);
5974 for (i
= 0; i
< count
; i
++)
5976 struct ipa_jump_func
*jump_func
;
5979 val
= dest_info
->known_csts
[i
];
5983 if (i
>= ipa_get_cs_argument_count (args
))
5985 jump_func
= ipa_get_ith_jump_func (args
, i
);
5986 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5987 ipa_get_type (dest_info
, i
));
5988 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5994 /* Determine whether CS also brings all aggregate values that NODE is
5998 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5999 struct cgraph_node
*node
)
6001 ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
);
6002 if (!ts
|| vec_safe_is_empty (ts
->m_agg_values
))
6005 const ipa_argagg_value_list
existing (ts
->m_agg_values
);
6006 auto_vec
<ipa_argagg_value
, 32> edge_values
;
6007 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
6008 gcc_checking_assert (dest_info
->ipcp_orig_node
);
6009 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
6010 push_agg_values_from_edge (cs
, dest_info
, &edge_values
, &existing
, false);
6011 const ipa_argagg_value_list
avl (&edge_values
);
6012 return avl
.superset_of_p (existing
);
6015 /* Given an original NODE and a VAL for which we have already created a
6016 specialized clone, look whether there are incoming edges that still lead
6017 into the old node but now also bring the requested value and also conform to
6018 all other criteria such that they can be redirected the special node.
6019 This function can therefore redirect the final edge in a SCC. */
6021 template <typename valtype
>
6023 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
6025 ipcp_value_source
<valtype
> *src
;
6026 profile_count redirected_sum
= profile_count::zero ();
6028 for (src
= val
->sources
; src
; src
= src
->next
)
6030 struct cgraph_edge
*cs
= src
->cs
;
6033 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
6034 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
6035 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
6038 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
6039 cs
->caller
->dump_name (),
6040 val
->spec_node
->dump_name ());
6042 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
6043 val
->spec_node
->expand_all_artificial_thunks ();
6044 if (cs
->count
.ipa ().initialized_p ())
6045 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
6047 cs
= get_next_cgraph_edge_clone (cs
);
6051 if (redirected_sum
.nonzero_p ())
6052 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
6055 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6058 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
6060 ipa_polymorphic_call_context
*ctx
;
6063 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
6064 if (!ctx
->useless_p ())
6069 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6071 static vec
<ipa_polymorphic_call_context
>
6072 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
6074 if (known_contexts_useful_p (known_contexts
))
6075 return known_contexts
.copy ();
6080 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6081 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6085 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6086 vec
<tree
> *known_csts
,
6087 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6088 ipcp_value
<tree
> *val
, int index
)
6090 *known_csts
= avals
->m_known_vals
.copy ();
6091 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6092 (*known_csts
)[index
] = val
->value
;
6095 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6096 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6100 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
6101 vec
<tree
> *known_csts
,
6102 vec
<ipa_polymorphic_call_context
> *known_contexts
,
6103 ipcp_value
<ipa_polymorphic_call_context
> *val
,
6106 *known_csts
= avals
->m_known_vals
.copy ();
6107 *known_contexts
= avals
->m_known_contexts
.copy ();
6108 (*known_contexts
)[index
] = val
->value
;
6111 /* Return true if OFFSET indicates this was not an aggregate value or there is
6112 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6116 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *aggvals
,
6117 int index
, HOST_WIDE_INT offset
, tree value
)
6122 const ipa_argagg_value_list
avl (aggvals
);
6123 tree v
= avl
.get_value (index
, offset
/ BITS_PER_UNIT
);
6124 return v
&& values_equal_for_ipcp_p (v
, value
);
6127 /* Return true if offset is minus one because source of a polymorphic context
6128 cannot be an aggregate value. */
6131 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *,
6132 int , HOST_WIDE_INT offset
,
6133 ipa_polymorphic_call_context
)
6135 return offset
== -1;
6138 /* Decide whether to create a special version of NODE for value VAL of
6139 parameter at the given INDEX. If OFFSET is -1, the value is for the
6140 parameter itself, otherwise it is stored at the given OFFSET of the
6141 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6142 is a vector which contains clones created for self-recursive calls with an
6143 arithmetic pass-through jump function. */
6145 template <typename valtype
>
6147 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6148 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6149 vec
<cgraph_node
*> *self_gen_clones
)
6153 profile_count count_sum
, rec_count_sum
;
6154 vec
<cgraph_edge
*> callers
;
6158 perhaps_add_new_callers (node
, val
);
6161 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6163 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6164 fprintf (dump_file
, " Ignoring candidate value because "
6165 "maximum unit size would be reached with %li.\n",
6166 val
->local_size_cost
+ overall_size
);
6169 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6170 &rec_count_sum
, &count_sum
))
6173 if (!dbg_cnt (ipa_cp_values
))
6176 if (val
->self_recursion_generated_p ())
6178 /* The edge counts in this case might not have been adjusted yet.
6179 Nevertleless, even if they were it would be only a guesswork which we
6180 can do now. The recursive part of the counts can be derived from the
6181 count of the original node anyway. */
6182 if (node
->count
.ipa ().nonzero_p ())
6184 unsigned dem
= self_gen_clones
->length () + 1;
6185 rec_count_sum
= node
->count
.ipa () / dem
;
6188 rec_count_sum
= profile_count::zero ();
6191 /* get_info_about_necessary_edges only sums up ipa counts. */
6192 count_sum
+= rec_count_sum
;
6194 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6196 fprintf (dump_file
, " - considering value ");
6197 print_ipcp_constant_value (dump_file
, val
->value
);
6198 fprintf (dump_file
, " for ");
6199 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6201 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6202 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6205 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6206 freq_sum
, count_sum
,
6207 val
->local_size_cost
)
6208 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6209 freq_sum
, count_sum
, val
->prop_size_cost
))
6213 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6214 node
->dump_name ());
6216 vec
<tree
> known_csts
;
6217 vec
<ipa_polymorphic_call_context
> known_contexts
;
6219 callers
= gather_edges_for_value (val
, node
, caller_count
);
6221 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6224 known_csts
= avals
->m_known_vals
.copy ();
6225 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6227 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6228 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6229 vec
<ipa_argagg_value
, va_gc
> *aggvals
6230 = find_aggregate_values_for_callers_subset (node
, callers
);
6231 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6232 offset
, val
->value
));
6233 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6236 if (val
->self_recursion_generated_p ())
6237 self_gen_clones
->safe_push (val
->spec_node
);
6239 update_profiling_info (node
, val
->spec_node
);
6242 overall_size
+= val
->local_size_cost
;
6243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6244 fprintf (dump_file
, " overall size reached %li\n",
6247 /* TODO: If for some lattice there is only one other known value
6248 left, make a special node for it too. */
6253 /* Like irange::contains_p(), but convert VAL to the range of R if
6257 ipa_range_contains_p (const vrange
&r
, tree val
)
6259 if (r
.undefined_p ())
6262 tree type
= r
.type ();
6263 if (!wi::fits_to_tree_p (wi::to_wide (val
), type
))
6266 val
= fold_convert (type
, val
);
6267 return r
.contains_p (val
);
6270 /* Decide whether and what specialized clones of NODE should be created. */
6273 decide_whether_version_node (struct cgraph_node
*node
)
6275 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6276 int i
, count
= ipa_get_param_count (info
);
6282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6283 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6284 node
->dump_name ());
6286 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6287 ipa_auto_call_arg_values avals
;
6288 gather_context_independent_values (info
, &avals
, false, NULL
);
6290 for (i
= 0; i
< count
;i
++)
6292 if (!ipa_is_param_used (info
, i
))
6295 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6296 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6297 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6300 && !avals
.m_known_vals
[i
])
6302 ipcp_value
<tree
> *val
;
6303 for (val
= lat
->values
; val
; val
= val
->next
)
6305 /* If some values generated for self-recursive calls with
6306 arithmetic jump functions fall outside of the known
6307 range for the parameter, we can skip them. */
6308 if (TREE_CODE (val
->value
) == INTEGER_CST
6309 && !plats
->m_value_range
.bottom_p ()
6310 && !ipa_range_contains_p (plats
->m_value_range
.m_vr
,
6313 /* This can happen also if a constant present in the source
6314 code falls outside of the range of parameter's type, so we
6316 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6318 fprintf (dump_file
, " - skipping%s value ",
6319 val
->self_recursion_generated_p ()
6320 ? " self_recursion_generated" : "");
6321 print_ipcp_constant_value (dump_file
, val
->value
);
6322 fprintf (dump_file
, " because it is outside known "
6327 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6332 if (!plats
->aggs_bottom
)
6334 struct ipcp_agg_lattice
*aglat
;
6335 ipcp_value
<tree
> *val
;
6336 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6337 if (!aglat
->bottom
&& aglat
->values
6338 /* If the following is false, the one value has been considered
6339 for cloning for all contexts. */
6340 && (plats
->aggs_contain_variable
6341 || !aglat
->is_single_const ()))
6342 for (val
= aglat
->values
; val
; val
= val
->next
)
6343 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6348 && avals
.m_known_contexts
[i
].useless_p ())
6350 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6351 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6352 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6357 if (!self_gen_clones
.is_empty ())
6359 self_gen_clones
.safe_push (node
);
6360 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6363 if (info
->do_clone_for_all_contexts
)
6365 if (!dbg_cnt (ipa_cp_values
))
6367 info
->do_clone_for_all_contexts
= false;
6371 struct cgraph_node
*clone
;
6372 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6374 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6376 cgraph_edge
*cs
= callers
[i
];
6377 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6379 if (caller_info
&& caller_info
->node_dead
)
6380 callers
.unordered_remove (i
);
6383 if (!adjust_callers_for_value_intersection (callers
, node
))
6385 /* If node is not called by anyone, or all its caller edges are
6386 self-recursive, the node is not really in use, no need to do
6388 info
->do_clone_for_all_contexts
= false;
6393 fprintf (dump_file
, " - Creating a specialized node of %s "
6394 "for all known contexts.\n", node
->dump_name ());
6396 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6397 vec
<ipa_polymorphic_call_context
> known_contexts
6398 = copy_useful_known_contexts (avals
.m_known_contexts
);
6399 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6400 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6401 vec
<ipa_argagg_value
, va_gc
> *aggvals
6402 = find_aggregate_values_for_callers_subset (node
, callers
);
6404 if (!known_contexts_useful_p (known_contexts
))
6406 known_contexts
.release ();
6407 known_contexts
= vNULL
;
6409 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6411 info
->do_clone_for_all_contexts
= false;
6412 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6419 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6422 spread_undeadness (struct cgraph_node
*node
)
6424 struct cgraph_edge
*cs
;
6426 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6427 if (ipa_edge_within_scc (cs
))
6429 struct cgraph_node
*callee
;
6430 class ipa_node_params
*info
;
6432 callee
= cs
->callee
->function_symbol (NULL
);
6433 info
= ipa_node_params_sum
->get (callee
);
6435 if (info
&& info
->node_dead
)
6437 info
->node_dead
= 0;
6438 spread_undeadness (callee
);
6443 /* Return true if NODE has a caller from outside of its SCC that is not
6444 dead. Worker callback for cgraph_for_node_and_aliases. */
6447 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6448 void *data ATTRIBUTE_UNUSED
)
6450 struct cgraph_edge
*cs
;
6452 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6453 if (cs
->caller
->thunk
6454 && cs
->caller
->call_for_symbol_thunks_and_aliases
6455 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6457 else if (!ipa_edge_within_scc (cs
))
6459 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6460 if (!caller_info
/* Unoptimized caller are like dead ones. */
6461 || !caller_info
->node_dead
)
6468 /* Identify nodes within the same SCC as NODE which are no longer needed
6469 because of new clones and will be removed as unreachable. */
6472 identify_dead_nodes (struct cgraph_node
*node
)
6474 struct cgraph_node
*v
;
6475 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6478 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6480 && !v
->call_for_symbol_thunks_and_aliases
6481 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6482 info
->node_dead
= 1;
6485 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6487 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6488 if (info
&& !info
->node_dead
)
6489 spread_undeadness (v
);
6492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6494 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6495 if (ipa_node_params_sum
->get (v
)
6496 && ipa_node_params_sum
->get (v
)->node_dead
)
6497 fprintf (dump_file
, " Marking node as dead: %s.\n",
6502 /* The decision stage. Iterate over the topological order of call graph nodes
6503 TOPO and make specialized clones if deemed beneficial. */
6506 ipcp_decision_stage (class ipa_topo_info
*topo
)
6511 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6513 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6515 struct cgraph_node
*node
= topo
->order
[i
];
6516 bool change
= false, iterate
= true;
6520 struct cgraph_node
*v
;
6522 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6523 if (v
->has_gimple_body_p ()
6524 && ipcp_versionable_function_p (v
))
6525 iterate
|= decide_whether_version_node (v
);
6530 identify_dead_nodes (node
);
6534 /* Look up all VR and bits information that we have discovered and copy it
6535 over to the transformation summary. */
6538 ipcp_store_vr_results (void)
6542 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6544 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6545 bool dumped_sth
= false;
6546 bool found_useful_result
= false;
6548 bool do_bits
= true;
6550 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6553 fprintf (dump_file
, "Not considering %s for VR discovery "
6554 "and propagate; -fipa-ipa-vrp: disabled.\n",
6555 node
->dump_name ());
6558 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_bit_cp
))
6561 fprintf (dump_file
, "Not considering %s for ipa bitwise "
6562 "propagation ; -fipa-bit-cp: disabled.\n",
6563 node
->dump_name ());
6566 if (!do_bits
&& !do_vr
)
6569 if (info
->ipcp_orig_node
)
6570 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6571 if (!info
->lattices
)
6572 /* Newly expanded artificial thunks do not have lattices. */
6575 unsigned count
= ipa_get_param_count (info
);
6576 for (unsigned i
= 0; i
< count
; i
++)
6578 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6580 && !plats
->m_value_range
.bottom_p ()
6581 && !plats
->m_value_range
.top_p ())
6583 found_useful_result
= true;
6586 if (do_bits
&& plats
->bits_lattice
.constant_p ())
6588 found_useful_result
= true;
6592 if (!found_useful_result
)
6595 ipcp_transformation_initialize ();
6596 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6597 vec_safe_reserve_exact (ts
->m_vr
, count
);
6599 for (unsigned i
= 0; i
< count
; i
++)
6601 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6602 ipcp_bits_lattice
*bits
= NULL
;
6605 && plats
->bits_lattice
.constant_p ()
6606 && dbg_cnt (ipa_cp_bits
))
6607 bits
= &plats
->bits_lattice
;
6610 && !plats
->m_value_range
.bottom_p ()
6611 && !plats
->m_value_range
.top_p ()
6612 && dbg_cnt (ipa_cp_vr
))
6616 Value_Range tmp
= plats
->m_value_range
.m_vr
;
6617 tree type
= ipa_get_type (info
, i
);
6618 irange
&r
= as_a
<irange
> (tmp
);
6619 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6620 TYPE_PRECISION (type
),
6622 wide_int::from (bits
->get_mask (),
6623 TYPE_PRECISION (type
),
6625 r
.update_bitmask (bm
);
6627 ts
->m_vr
->quick_push (vr
);
6631 ipa_vr
vr (plats
->m_value_range
.m_vr
);
6632 ts
->m_vr
->quick_push (vr
);
6637 tree type
= ipa_get_type (info
, i
);
6639 tmp
.set_varying (type
);
6640 irange
&r
= as_a
<irange
> (tmp
);
6641 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6642 TYPE_PRECISION (type
),
6644 wide_int::from (bits
->get_mask (),
6645 TYPE_PRECISION (type
),
6647 r
.update_bitmask (bm
);
6649 ts
->m_vr
->quick_push (vr
);
6654 ts
->m_vr
->quick_push (vr
);
6657 if (!dump_file
|| !bits
)
6662 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6663 node
->dump_name ());
6666 fprintf (dump_file
, " param %i: value = ", i
);
6667 print_hex (bits
->get_value (), dump_file
);
6668 fprintf (dump_file
, ", mask = ");
6669 print_hex (bits
->get_mask (), dump_file
);
6670 fprintf (dump_file
, "\n");
6675 /* The IPCP driver. */
6680 class ipa_topo_info topo
;
6682 if (edge_clone_summaries
== NULL
)
6683 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6685 ipa_check_create_node_params ();
6686 ipa_check_create_edge_args ();
6687 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6691 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6692 if (dump_flags
& TDF_DETAILS
)
6693 ipa_print_all_params (dump_file
);
6694 ipa_print_all_jump_functions (dump_file
);
6697 /* Topological sort. */
6698 build_toporder_info (&topo
);
6699 /* Do the interprocedural propagation. */
6700 ipcp_propagate_stage (&topo
);
6701 /* Decide what constant propagation and cloning should be performed. */
6702 ipcp_decision_stage (&topo
);
6703 /* Store results of value range and bits propagation. */
6704 ipcp_store_vr_results ();
6706 /* Free all IPCP structures. */
6707 delete clone_num_suffixes
;
6708 free_toporder_info (&topo
);
6709 delete edge_clone_summaries
;
6710 edge_clone_summaries
= NULL
;
6711 ipa_free_all_structures_after_ipa_cp ();
6713 fprintf (dump_file
, "\nIPA constant propagation end\n");
6717 /* Initialization and computation of IPCP data structures. This is the initial
6718 intraprocedural analysis of functions, which gathers information to be
6719 propagated later on. */
6722 ipcp_generate_summary (void)
6724 struct cgraph_node
*node
;
6727 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6728 ipa_register_cgraph_hooks ();
6730 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6731 ipa_analyze_node (node
);
6736 const pass_data pass_data_ipa_cp
=
6738 IPA_PASS
, /* type */
6740 OPTGROUP_NONE
, /* optinfo_flags */
6741 TV_IPA_CONSTANT_PROP
, /* tv_id */
6742 0, /* properties_required */
6743 0, /* properties_provided */
6744 0, /* properties_destroyed */
6745 0, /* todo_flags_start */
6746 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6749 class pass_ipa_cp
: public ipa_opt_pass_d
6752 pass_ipa_cp (gcc::context
*ctxt
)
6753 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6754 ipcp_generate_summary
, /* generate_summary */
6755 NULL
, /* write_summary */
6756 NULL
, /* read_summary */
6757 ipcp_write_transformation_summaries
, /*
6758 write_optimization_summary */
6759 ipcp_read_transformation_summaries
, /*
6760 read_optimization_summary */
6761 NULL
, /* stmt_fixup */
6762 0, /* function_transform_todo_flags_start */
6763 ipcp_transform_function
, /* function_transform */
6764 NULL
) /* variable_transform */
6767 /* opt_pass methods: */
6768 bool gate (function
*) final override
6770 /* FIXME: We should remove the optimize check after we ensure we never run
6771 IPA passes when not optimizing. */
6772 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6775 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6777 }; // class pass_ipa_cp
6782 make_pass_ipa_cp (gcc::context
*ctxt
)
6784 return new pass_ipa_cp (ctxt
);
6787 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6788 within the same process. For use by toplev::finalize. */
6791 ipa_cp_cc_finalize (void)
6793 base_count
= profile_count::uninitialized ();
6795 orig_overall_size
= 0;
6796 ipcp_free_transformation_sum ();
6799 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6800 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6801 DECL_UID-sorted vector if available (which is pre-computed only if there are
6802 many parameters). Can return -1 if param is static chain not represented
6803 among DECL_ARGUMENTS. */
6806 ipcp_transformation::get_param_index (const_tree fndecl
, const_tree param
) const
6808 gcc_assert (TREE_CODE (param
) == PARM_DECL
);
6811 unsigned puid
= DECL_UID (param
);
6812 const ipa_uid_to_idx_map_elt
*res
6813 = std::lower_bound (m_uid_to_idx
->begin(), m_uid_to_idx
->end (), puid
,
6814 [] (const ipa_uid_to_idx_map_elt
&elt
, unsigned uid
)
6816 return elt
.uid
< uid
;
6818 if (res
== m_uid_to_idx
->end ()
6819 || res
->uid
!= puid
)
6821 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6828 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6832 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6836 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6837 according to the uid. */
6840 compare_uids (const void *a
, const void *b
)
6842 const ipa_uid_to_idx_map_elt
*e1
= (const ipa_uid_to_idx_map_elt
*) a
;
6843 const ipa_uid_to_idx_map_elt
*e2
= (const ipa_uid_to_idx_map_elt
*) b
;
6844 if (e1
->uid
< e2
->uid
)
6846 if (e1
->uid
> e2
->uid
)
6851 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6852 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6853 from parameters to their indices in DECL_ARGUMENTS chain. */
6856 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl
)
6858 int c
= count_formal_params (fndecl
);
6862 m_uid_to_idx
= NULL
;
6863 vec_safe_reserve (m_uid_to_idx
, c
, true);
6865 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6867 ipa_uid_to_idx_map_elt elt
;
6868 elt
.uid
= DECL_UID (p
);
6870 m_uid_to_idx
->quick_push (elt
);
6872 m_uid_to_idx
->qsort (compare_uids
);