1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
105 #include "coretypes.h"
108 #include "gimple-expr.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
128 #include "symtab-clones.h"
130 template <typename valtype
> class ipcp_value
;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype
>
135 struct ipcp_value_source
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset
;
141 /* The incoming edge that brought the value. */
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value
<valtype
> *val
;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source
*next
;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit
;
163 /* Time benefit that specializing the function for this value can bring about
165 sreal prop_time_benefit
;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
169 /* Size cost that specializing the function for this value can bring about in
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype
>
181 class ipcp_value
: public ipcp_value_base
184 /* The actual value for the given parameter. */
186 /* The list of sources from which this value originates. */
187 ipcp_value_source
<valtype
> *sources
= nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value
*next
= nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
192 ipcp_value
*scc_next
= nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value
*topo_next
= nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
198 cgraph_node
*spec_node
= nullptr;
199 /* Depth first search number and low link for topological sorting of
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
211 unsigned self_recursion_generated_level
= 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack
= false;
215 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
216 HOST_WIDE_INT offset
);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value
<valtype
> *o
)
222 return o
->scc_no
== scc_no
;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level
> 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype
>
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value
<valtype
> *values
;
249 /* Number of known values and types in this lattice. */
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable
;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval
, cgraph_edge
*cs
,
261 ipcp_value
<valtype
> *src_val
= NULL
,
262 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
263 ipcp_value
<valtype
> **val_p
= NULL
,
264 unsigned same_lat_gen_level
= 0);
265 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
268 /* Lattice of tree values with an offset to describe a part of an
271 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset
;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice
*next
;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
309 bool bottom_p () { return m_lattice_val
== IPA_BITS_VARYING
; }
310 bool top_p () { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
311 bool constant_p () { return m_lattice_val
== IPA_BITS_CONSTANT
; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int
, widest_int
);
315 widest_int
get_value () { return m_value
; }
316 widest_int
get_mask () { return m_mask
; }
318 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
319 enum tree_code
, tree
);
321 bool meet_with (widest_int
, widest_int
, unsigned);
326 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
328 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
329 If a bit in mask is set to 0, then the corresponding bit in
330 value is known to be constant. */
331 widest_int m_value
, m_mask
;
333 bool meet_with_1 (widest_int
, widest_int
, unsigned);
334 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
337 /* Lattice of value ranges. */
339 class ipcp_vr_lattice
344 inline bool bottom_p () const;
345 inline bool top_p () const;
346 inline bool set_to_bottom ();
347 bool meet_with (const value_range
*p_vr
);
348 bool meet_with (const ipcp_vr_lattice
&other
);
349 void init () { gcc_assert (m_vr
.undefined_p ()); }
350 void print (FILE * f
);
353 bool meet_with_1 (const value_range
*other_vr
);
356 /* Structure containing lattices for a parameter itself and for pieces of
357 aggregates that are passed in the parameter or by a reference in a parameter
358 plus some other useful flags. */
360 class ipcp_param_lattices
363 /* Lattice describing the value of the parameter itself. */
364 ipcp_lattice
<tree
> itself
;
365 /* Lattice describing the polymorphic contexts of a parameter. */
366 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
367 /* Lattices describing aggregate parts. */
368 ipcp_agg_lattice
*aggs
;
369 /* Lattice describing known bits. */
370 ipcp_bits_lattice bits_lattice
;
371 /* Lattice describing value range. */
372 ipcp_vr_lattice m_value_range
;
373 /* Number of aggregate lattices */
375 /* True if aggregate data were passed by reference (as opposed to by
378 /* All aggregate lattices contain a variable component (in addition to
380 bool aggs_contain_variable
;
381 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
382 for any propagation). */
385 /* There is a virtual call based on this parameter. */
389 /* Allocation pools for values and their sources in ipa-cp. */
391 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
392 ("IPA-CP constant values");
394 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
395 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
398 ("IPA-CP value sources");
400 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
401 ("IPA_CP aggregate lattices");
403 /* Base count to use in heuristics when using profile feedback. */
405 static profile_count base_count
;
407 /* Original overall size of the program. */
409 static long overall_size
, orig_overall_size
;
411 /* Node name to unique clone suffix number map. */
412 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
414 /* Return the param lattices structure corresponding to the Ith formal
415 parameter of the function described by INFO. */
416 static inline class ipcp_param_lattices
*
417 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
419 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
420 gcc_checking_assert (!info
->ipcp_orig_node
);
421 gcc_checking_assert (info
->lattices
);
422 return &(info
->lattices
[i
]);
425 /* Return the lattice corresponding to the scalar value of the Ith formal
426 parameter of the function described by INFO. */
427 static inline ipcp_lattice
<tree
> *
428 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
430 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
431 return &plats
->itself
;
434 /* Return the lattice corresponding to the scalar value of the Ith formal
435 parameter of the function described by INFO. */
436 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
437 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
439 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
440 return &plats
->ctxlat
;
443 /* Return whether LAT is a lattice with a single constant and without an
446 template <typename valtype
>
448 ipcp_lattice
<valtype
>::is_single_const ()
450 if (bottom
|| contains_variable
|| values_count
!= 1)
456 /* Print V which is extracted from a value in a lattice to F. */
459 print_ipcp_constant_value (FILE * f
, tree v
)
461 if (TREE_CODE (v
) == ADDR_EXPR
462 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
465 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
468 print_generic_expr (f
, v
);
471 /* Print V which is extracted from a value in a lattice to F. */
474 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
479 /* Print a lattice LAT to F. */
481 template <typename valtype
>
483 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
485 ipcp_value
<valtype
> *val
;
490 fprintf (f
, "BOTTOM\n");
494 if (!values_count
&& !contains_variable
)
496 fprintf (f
, "TOP\n");
500 if (contains_variable
)
502 fprintf (f
, "VARIABLE");
508 for (val
= values
; val
; val
= val
->next
)
510 if (dump_benefits
&& prev
)
512 else if (!dump_benefits
&& prev
)
517 print_ipcp_constant_value (f
, val
->value
);
521 ipcp_value_source
<valtype
> *s
;
523 if (val
->self_recursion_generated_p ())
524 fprintf (f
, " [self_gen(%i), from:",
525 val
->self_recursion_generated_level
);
527 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
528 for (s
= val
->sources
; s
; s
= s
->next
)
529 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
530 s
->cs
->sreal_frequency ().to_double ());
535 fprintf (f
, " [loc_time: %g, loc_size: %i, "
536 "prop_time: %g, prop_size: %i]\n",
537 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
538 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
545 ipcp_bits_lattice::print (FILE *f
)
548 fprintf (f
, " Bits unknown (TOP)\n");
549 else if (bottom_p ())
550 fprintf (f
, " Bits unusable (BOTTOM)\n");
553 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
554 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
559 /* Print value range lattice to F. */
562 ipcp_vr_lattice::print (FILE * f
)
564 dump_value_range (f
, &m_vr
);
567 /* Print all ipcp_lattices of all functions to F. */
570 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
572 struct cgraph_node
*node
;
575 fprintf (f
, "\nLattices:\n");
576 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
578 class ipa_node_params
*info
;
580 info
= ipa_node_params_sum
->get (node
);
581 /* Skip unoptimized functions and constprop clones since we don't make
582 lattices for them. */
583 if (!info
|| info
->ipcp_orig_node
)
585 fprintf (f
, " Node: %s:\n", node
->dump_name ());
586 count
= ipa_get_param_count (info
);
587 for (i
= 0; i
< count
; i
++)
589 struct ipcp_agg_lattice
*aglat
;
590 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
591 fprintf (f
, " param [%d]: ", i
);
592 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
593 fprintf (f
, " ctxs: ");
594 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
595 plats
->bits_lattice
.print (f
);
597 plats
->m_value_range
.print (f
);
599 if (plats
->virt_call
)
600 fprintf (f
, " virt_call flag set\n");
602 if (plats
->aggs_bottom
)
604 fprintf (f
, " AGGS BOTTOM\n");
607 if (plats
->aggs_contain_variable
)
608 fprintf (f
, " AGGS VARIABLE\n");
609 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
611 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
612 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
613 aglat
->print (f
, dump_sources
, dump_benefits
);
619 /* Determine whether it is at all technically possible to create clones of NODE
620 and store this information in the ipa_node_params structure associated
624 determine_versionability (struct cgraph_node
*node
,
625 class ipa_node_params
*info
)
627 const char *reason
= NULL
;
629 /* There are a number of generic reasons functions cannot be versioned. We
630 also cannot remove parameters if there are type attributes such as fnspec
632 if (node
->alias
|| node
->thunk
)
633 reason
= "alias or thunk";
634 else if (!node
->versionable
)
635 reason
= "not a tree_versionable_function";
636 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
637 reason
= "insufficient body availability";
638 else if (!opt_for_fn (node
->decl
, optimize
)
639 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
640 reason
= "non-optimized function";
641 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
643 /* Ideally we should clone the SIMD clones themselves and create
644 vector copies of them, so IPA-cp and SIMD clones can happily
645 coexist, but that may not be worth the effort. */
646 reason
= "function has SIMD clones";
648 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
650 /* Ideally we should clone the target clones themselves and create
651 copies of them, so IPA-cp and target clones can happily
652 coexist, but that may not be worth the effort. */
653 reason
= "function target_clones attribute";
655 /* Don't clone decls local to a comdat group; it breaks and for C++
656 decloned constructors, inlining is always better anyway. */
657 else if (node
->comdat_local_p ())
658 reason
= "comdat-local function";
659 else if (node
->calls_comdat_local
)
661 /* TODO: call is versionable if we make sure that all
662 callers are inside of a comdat group. */
663 reason
= "calls comdat-local function";
666 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
667 work only when inlined. Cloning them may still lead to better code
668 because ipa-cp will not give up on cloning further. If the function is
669 external this however leads to wrong code because we may end up producing
670 offline copy of the function. */
671 if (DECL_EXTERNAL (node
->decl
))
672 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
673 edge
= edge
->next_callee
)
674 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
676 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
677 reason
= "external function which calls va_arg_pack";
678 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
679 == BUILT_IN_VA_ARG_PACK_LEN
)
680 reason
= "external function which calls va_arg_pack_len";
683 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
684 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
685 node
->dump_name (), reason
);
687 info
->versionable
= (reason
== NULL
);
690 /* Return true if it is at all technically possible to create clones of a
694 ipcp_versionable_function_p (struct cgraph_node
*node
)
696 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
697 return info
&& info
->versionable
;
700 /* Structure holding accumulated information about callers of a node. */
702 struct caller_statistics
704 /* If requested (see below), self-recursive call counts are summed into this
706 profile_count rec_count_sum
;
707 /* The sum of all ipa counts of all the other (non-recursive) calls. */
708 profile_count count_sum
;
709 /* Sum of all frequencies for all calls. */
711 /* Number of calls and hot calls respectively. */
712 int n_calls
, n_hot_calls
;
713 /* If itself is set up, also count the number of non-self-recursive
716 /* If non-NULL, this is the node itself and calls from it should have their
717 counts included in rec_count_sum and not count_sum. */
721 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
722 from IGNORED_CALLER are not counted. */
725 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
727 stats
->rec_count_sum
= profile_count::zero ();
728 stats
->count_sum
= profile_count::zero ();
730 stats
->n_hot_calls
= 0;
731 stats
->n_nonrec_calls
= 0;
733 stats
->itself
= itself
;
736 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
737 non-thunk incoming edges to NODE. */
740 gather_caller_stats (struct cgraph_node
*node
, void *data
)
742 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
743 struct cgraph_edge
*cs
;
745 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
746 if (!cs
->caller
->thunk
)
748 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
749 if (info
&& info
->node_dead
)
752 if (cs
->count
.ipa ().initialized_p ())
754 if (stats
->itself
&& stats
->itself
== cs
->caller
)
755 stats
->rec_count_sum
+= cs
->count
.ipa ();
757 stats
->count_sum
+= cs
->count
.ipa ();
759 stats
->freq_sum
+= cs
->sreal_frequency ();
761 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
762 stats
->n_nonrec_calls
++;
764 if (cs
->maybe_hot_p ())
765 stats
->n_hot_calls
++;
771 /* Return true if this NODE is viable candidate for cloning. */
774 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
776 struct caller_statistics stats
;
778 gcc_checking_assert (node
->has_gimple_body_p ());
780 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
783 fprintf (dump_file
, "Not considering %s for cloning; "
784 "-fipa-cp-clone disabled.\n",
789 if (node
->optimize_for_size_p ())
792 fprintf (dump_file
, "Not considering %s for cloning; "
793 "optimizing it for size.\n",
798 init_caller_stats (&stats
);
799 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
801 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
804 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
809 /* When profile is available and function is hot, propagate into it even if
810 calls seems cold; constant propagation can improve function's speed
812 if (stats
.count_sum
> profile_count::zero ()
813 && node
->count
.ipa ().initialized_p ())
815 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
818 fprintf (dump_file
, "Considering %s for cloning; "
819 "usually called directly.\n",
824 if (!stats
.n_hot_calls
)
827 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
832 fprintf (dump_file
, "Considering %s for cloning.\n",
837 template <typename valtype
>
838 class value_topo_info
841 /* Head of the linked list of topologically sorted values. */
842 ipcp_value
<valtype
> *values_topo
;
843 /* Stack for creating SCCs, represented by a linked list too. */
844 ipcp_value
<valtype
> *stack
;
845 /* Counter driving the algorithm in add_val_to_toposort. */
848 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
850 void add_val (ipcp_value
<valtype
> *cur_val
);
851 void propagate_effects ();
854 /* Arrays representing a topological ordering of call graph nodes and a stack
855 of nodes used during constant propagation and also data required to perform
856 topological sort of values and propagation of benefits in the determined
862 /* Array with obtained topological order of cgraph nodes. */
863 struct cgraph_node
**order
;
864 /* Stack of cgraph nodes used during propagation within SCC until all values
865 in the SCC stabilize. */
866 struct cgraph_node
**stack
;
867 int nnodes
, stack_top
;
869 value_topo_info
<tree
> constants
;
870 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
872 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
877 /* Skip edges from and to nodes without ipa_cp enabled.
878 Ignore not available symbols. */
881 ignore_edge_p (cgraph_edge
*e
)
883 enum availability avail
;
884 cgraph_node
*ultimate_target
885 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
887 return (avail
<= AVAIL_INTERPOSABLE
888 || !opt_for_fn (ultimate_target
->decl
, optimize
)
889 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
892 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
895 build_toporder_info (class ipa_topo_info
*topo
)
897 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
898 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
900 gcc_checking_assert (topo
->stack_top
== 0);
901 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
905 /* Free information about strongly connected components and the arrays in
909 free_toporder_info (class ipa_topo_info
*topo
)
911 ipa_free_postorder_info ();
916 /* Add NODE to the stack in TOPO, unless it is already there. */
919 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
921 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
922 if (info
->node_enqueued
)
924 info
->node_enqueued
= 1;
925 topo
->stack
[topo
->stack_top
++] = node
;
928 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
931 static struct cgraph_node
*
932 pop_node_from_stack (class ipa_topo_info
*topo
)
936 struct cgraph_node
*node
;
938 node
= topo
->stack
[topo
->stack_top
];
939 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
946 /* Set lattice LAT to bottom and return true if it previously was not set as
949 template <typename valtype
>
951 ipcp_lattice
<valtype
>::set_to_bottom ()
958 /* Mark lattice as containing an unknown value and return true if it previously
959 was not marked as such. */
961 template <typename valtype
>
963 ipcp_lattice
<valtype
>::set_contains_variable ()
965 bool ret
= !contains_variable
;
966 contains_variable
= true;
970 /* Set all aggregate lattices in PLATS to bottom and return true if they were
971 not previously set as such. */
974 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
976 bool ret
= !plats
->aggs_bottom
;
977 plats
->aggs_bottom
= true;
981 /* Mark all aggregate lattices in PLATS as containing an unknown value and
982 return true if they were not previously marked as such. */
985 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
987 bool ret
= !plats
->aggs_contain_variable
;
988 plats
->aggs_contain_variable
= true;
993 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
995 return meet_with_1 (&other
.m_vr
);
998 /* Meet the current value of the lattice with value range described by VR
1002 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
1004 return meet_with_1 (p_vr
);
1007 /* Meet the current value of the lattice with value range described by
1008 OTHER_VR lattice. Return TRUE if anything changed. */
1011 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
1016 if (other_vr
->varying_p ())
1017 return set_to_bottom ();
1019 value_range
save (m_vr
);
1020 m_vr
.union_ (other_vr
);
1021 return !m_vr
.equal_p (save
);
1024 /* Return true if value range information in the lattice is yet unknown. */
1027 ipcp_vr_lattice::top_p () const
1029 return m_vr
.undefined_p ();
1032 /* Return true if value range information in the lattice is known to be
1036 ipcp_vr_lattice::bottom_p () const
1038 return m_vr
.varying_p ();
1041 /* Set value range information in the lattice to bottom. Return true if it
1042 previously was in a different state. */
1045 ipcp_vr_lattice::set_to_bottom ()
1047 if (m_vr
.varying_p ())
1049 /* ?? We create all sorts of VARYING ranges for floats, structures,
1050 and other types which we cannot handle as ranges. We should
1051 probably avoid handling them throughout the pass, but it's easier
1052 to create a sensible VARYING here and let the lattice
1054 m_vr
.set_varying (integer_type_node
);
1058 /* Set lattice value to bottom, if it already isn't the case. */
1061 ipcp_bits_lattice::set_to_bottom ()
1065 m_lattice_val
= IPA_BITS_VARYING
;
1071 /* Set to constant if it isn't already. Only meant to be called
1072 when switching state from TOP. */
1075 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1077 gcc_assert (top_p ());
1078 m_lattice_val
= IPA_BITS_CONSTANT
;
1079 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1084 /* Convert operand to value, mask form. */
1087 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1089 wide_int
get_nonzero_bits (const_tree
);
1091 if (TREE_CODE (operand
) == INTEGER_CST
)
1093 *valuep
= wi::to_widest (operand
);
1103 /* Meet operation, similar to ccp_lattice_meet, we xor values
1104 if this->value, value have different values at same bit positions, we want
1105 to drop that bit to varying. Return true if mask is changed.
1106 This function assumes that the lattice value is in CONSTANT state */
1109 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1112 gcc_assert (constant_p ());
1114 widest_int old_mask
= m_mask
;
1115 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1118 if (wi::sext (m_mask
, precision
) == -1)
1119 return set_to_bottom ();
1121 return m_mask
!= old_mask
;
1124 /* Meet the bits lattice with operand
1125 described by <value, mask, sgn, precision. */
1128 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1136 if (wi::sext (mask
, precision
) == -1)
1137 return set_to_bottom ();
1138 return set_to_constant (value
, mask
);
1141 return meet_with_1 (value
, mask
, precision
);
1144 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1145 if code is binary operation or bit_value_unop (other) if code is unary op.
1146 In the case when code is nop_expr, no adjustment is required. */
1149 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1150 signop sgn
, enum tree_code code
, tree operand
)
1152 if (other
.bottom_p ())
1153 return set_to_bottom ();
1155 if (bottom_p () || other
.top_p ())
1158 widest_int adjusted_value
, adjusted_mask
;
1160 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1162 tree type
= TREE_TYPE (operand
);
1163 widest_int o_value
, o_mask
;
1164 get_value_and_mask (operand
, &o_value
, &o_mask
);
1166 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1167 sgn
, precision
, other
.get_value (), other
.get_mask (),
1168 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1170 if (wi::sext (adjusted_mask
, precision
) == -1)
1171 return set_to_bottom ();
1174 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1176 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1177 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1180 if (wi::sext (adjusted_mask
, precision
) == -1)
1181 return set_to_bottom ();
1185 return set_to_bottom ();
1189 if (wi::sext (adjusted_mask
, precision
) == -1)
1190 return set_to_bottom ();
1191 return set_to_constant (adjusted_value
, adjusted_mask
);
1194 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
);
1197 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1198 return true is any of them has not been marked as such so far. */
1201 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1204 ret
= plats
->itself
.set_contains_variable ();
1205 ret
|= plats
->ctxlat
.set_contains_variable ();
1206 ret
|= set_agg_lats_contain_variable (plats
);
1207 ret
|= plats
->bits_lattice
.set_to_bottom ();
1208 ret
|= plats
->m_value_range
.set_to_bottom ();
1212 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1213 points to by the number of callers to NODE. */
1216 count_callers (cgraph_node
*node
, void *data
)
1218 int *caller_count
= (int *) data
;
1220 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1221 /* Local thunks can be handled transparently, but if the thunk cannot
1222 be optimized out, count it as a real use. */
1223 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1228 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1229 the one caller of some other node. Set the caller's corresponding flag. */
1232 set_single_call_flag (cgraph_node
*node
, void *)
1234 cgraph_edge
*cs
= node
->callers
;
1235 /* Local thunks can be handled transparently, skip them. */
1236 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1237 cs
= cs
->next_caller
;
1239 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1241 info
->node_calling_single_call
= true;
1247 /* Initialize ipcp_lattices. */
1250 initialize_node_lattices (struct cgraph_node
*node
)
1252 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1253 struct cgraph_edge
*ie
;
1254 bool disable
= false, variable
= false;
1257 gcc_checking_assert (node
->has_gimple_body_p ());
1259 if (!ipa_get_param_count (info
))
1261 else if (node
->local
)
1263 int caller_count
= 0;
1264 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1266 gcc_checking_assert (caller_count
> 0);
1267 if (caller_count
== 1)
1268 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1273 /* When cloning is allowed, we can assume that externally visible
1274 functions are not called. We will compensate this by cloning
1276 if (ipcp_versionable_function_p (node
)
1277 && ipcp_cloning_candidate_p (node
))
1283 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1284 && !node
->alias
&& !node
->thunk
)
1286 fprintf (dump_file
, "Initializing lattices of %s\n",
1287 node
->dump_name ());
1288 if (disable
|| variable
)
1289 fprintf (dump_file
, " Marking all lattices as %s\n",
1290 disable
? "BOTTOM" : "VARIABLE");
1293 auto_vec
<bool, 16> surviving_params
;
1294 bool pre_modified
= false;
1296 clone_info
*cinfo
= clone_info::get (node
);
1298 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1300 /* At the moment all IPA optimizations should use the number of
1301 parameters of the prevailing decl as the m_always_copy_start.
1302 Handling any other value would complicate the code below, so for the
1303 time bing let's only assert it is so. */
1304 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1305 == ipa_get_param_count (info
))
1306 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1308 pre_modified
= true;
1309 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1311 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1312 && !node
->alias
&& !node
->thunk
)
1315 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1317 if (j
< (int) surviving_params
.length ()
1318 && surviving_params
[j
])
1323 " The following parameters are dead on arrival:");
1326 fprintf (dump_file
, " %u", j
);
1329 fprintf (dump_file
, "\n");
1333 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1335 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1337 || !ipa_get_type (info
, i
)
1338 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1339 || !surviving_params
[i
])))
1341 plats
->itself
.set_to_bottom ();
1342 plats
->ctxlat
.set_to_bottom ();
1343 set_agg_lats_to_bottom (plats
);
1344 plats
->bits_lattice
.set_to_bottom ();
1345 plats
->m_value_range
.m_vr
= value_range ();
1346 plats
->m_value_range
.set_to_bottom ();
1350 plats
->m_value_range
.init ();
1352 set_all_contains_variable (plats
);
1356 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1357 if (ie
->indirect_info
->polymorphic
1358 && ie
->indirect_info
->param_index
>= 0)
1360 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1361 ipa_get_parm_lattices (info
,
1362 ie
->indirect_info
->param_index
)->virt_call
= 1;
1366 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1370 ipacp_value_safe_for_type (tree param_type
, tree value
)
1372 tree val_type
= TREE_TYPE (value
);
1373 if (param_type
== val_type
1374 || useless_type_conversion_p (param_type
, val_type
)
1375 || fold_convertible_p (param_type
, value
))
1381 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1384 values_equal_for_ipcp_p (tree x
, tree y
)
1386 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
1391 if (TREE_CODE (x
) == ADDR_EXPR
1392 && TREE_CODE (y
) == ADDR_EXPR
1393 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
1394 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
1395 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
1396 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
1398 return operand_equal_p (x
, y
, 0);
1401 /* Return the result of a (possibly arithmetic) operation on the constant
1402 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1403 the type of the parameter to which the result is passed. Return
1404 NULL_TREE if that cannot be determined or be considered an
1405 interprocedural invariant. */
1408 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1413 if (opcode
== NOP_EXPR
)
1415 if (!is_gimple_ip_invariant (input
))
1418 if (opcode
== ASSERT_EXPR
)
1420 if (values_equal_for_ipcp_p (input
, operand
))
1428 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1429 res_type
= boolean_type_node
;
1430 else if (expr_type_first_operand_type_p (opcode
))
1431 res_type
= TREE_TYPE (input
);
1436 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1437 res
= fold_unary (opcode
, res_type
, input
);
1439 res
= fold_binary (opcode
, res_type
, input
, operand
);
1441 if (res
&& !is_gimple_ip_invariant (res
))
1447 /* Return the result of a (possibly arithmetic) pass through jump function
1448 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1449 to which the result is passed. Return NULL_TREE if that cannot be
1450 determined or be considered an interprocedural invariant. */
1453 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1456 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1458 ipa_get_jf_pass_through_operand (jfunc
),
1462 /* Return the result of an ancestor jump function JFUNC on the constant value
1463 INPUT. Return NULL_TREE if that cannot be determined. */
1466 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1468 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1469 if (TREE_CODE (input
) == ADDR_EXPR
)
1471 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1472 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1473 if (known_eq (off
, 0))
1475 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1476 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1477 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1478 build_int_cst (ptr_type_node
, byte_offset
)));
1484 /* Determine whether JFUNC evaluates to a single known constant value and if
1485 so, return it. Otherwise return NULL. INFO describes the caller node or
1486 the one it is inlined to, so that pass-through jump functions can be
1487 evaluated. PARM_TYPE is the type of the parameter to which the result is
1491 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1494 if (jfunc
->type
== IPA_JF_CONST
)
1495 return ipa_get_jf_constant (jfunc
);
1496 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1497 || jfunc
->type
== IPA_JF_ANCESTOR
)
1502 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1503 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1505 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1507 if (info
->ipcp_orig_node
)
1508 input
= info
->known_csts
[idx
];
1511 ipcp_lattice
<tree
> *lat
;
1514 || idx
>= ipa_get_param_count (info
))
1516 lat
= ipa_get_scalar_lat (info
, idx
);
1517 if (!lat
->is_single_const ())
1519 input
= lat
->values
->value
;
1525 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1526 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1528 return ipa_get_jf_ancestor_result (jfunc
, input
);
1534 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1535 that INFO describes the caller node or the one it is inlined to, CS is the
1536 call graph edge corresponding to JFUNC and CSIDX index of the described
1539 ipa_polymorphic_call_context
1540 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1541 ipa_jump_func
*jfunc
)
1543 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1544 ipa_polymorphic_call_context ctx
;
1545 ipa_polymorphic_call_context
*edge_ctx
1546 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1548 if (edge_ctx
&& !edge_ctx
->useless_p ())
1551 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1552 || jfunc
->type
== IPA_JF_ANCESTOR
)
1554 ipa_polymorphic_call_context srcctx
;
1556 bool type_preserved
= true;
1557 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1559 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1561 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1562 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1566 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1567 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1569 if (info
->ipcp_orig_node
)
1571 if (info
->known_contexts
.exists ())
1572 srcctx
= info
->known_contexts
[srcidx
];
1577 || srcidx
>= ipa_get_param_count (info
))
1579 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1580 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1581 if (!lat
->is_single_const ())
1583 srcctx
= lat
->values
->value
;
1585 if (srcctx
.useless_p ())
1587 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1588 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1589 if (!type_preserved
)
1590 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1591 srcctx
.combine_with (ctx
);
1598 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1599 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1600 the result is a range or an anti-range. */
1603 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1604 value_range
*src_vr
,
1605 enum tree_code operation
,
1606 tree dst_type
, tree src_type
)
1608 range_fold_unary_expr (dst_vr
, operation
, dst_type
, src_vr
, src_type
);
1609 if (dst_vr
->varying_p () || dst_vr
->undefined_p ())
1614 /* Determine value_range of JFUNC given that INFO describes the caller node or
1615 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1616 and PARM_TYPE of the parameter. */
1619 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1620 ipa_jump_func
*jfunc
, tree parm_type
)
1624 ipa_vr_operation_and_type_effects (&vr
,
1626 NOP_EXPR
, parm_type
,
1627 jfunc
->m_vr
->type ());
1628 if (vr
.singleton_p ())
1630 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1633 ipcp_transformation
*sum
1634 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1635 ? cs
->caller
->inlined_to
1637 if (!sum
|| !sum
->m_vr
)
1640 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1642 if (!(*sum
->m_vr
)[idx
].known
)
1644 tree vr_type
= ipa_get_type (info
, idx
);
1645 value_range
srcvr (wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].min
),
1646 wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].max
),
1647 (*sum
->m_vr
)[idx
].type
);
1649 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1651 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1655 if (ipa_vr_operation_and_type_effects (&res
,
1657 operation
, parm_type
,
1663 value_range op_res
, res
;
1664 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1665 value_range
op_vr (op
, op
);
1667 range_fold_binary_expr (&op_res
, operation
, vr_type
, &srcvr
, &op_vr
);
1668 if (ipa_vr_operation_and_type_effects (&res
,
1670 NOP_EXPR
, parm_type
,
1678 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1679 parameter with the given INDEX. */
1682 get_clone_agg_value (struct cgraph_node
*node
, HOST_WIDE_INT offset
,
1685 struct ipa_agg_replacement_value
*aggval
;
1687 aggval
= ipa_get_agg_replacements_for_node (node
);
1690 if (aggval
->offset
== offset
1691 && aggval
->index
== index
)
1692 return aggval
->value
;
1693 aggval
= aggval
->next
;
1698 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1699 single known constant value and if so, return it. Otherwise return NULL.
1700 NODE and INFO describes the caller node or the one it is inlined to, and
1701 its related info. */
1704 ipa_agg_value_from_node (class ipa_node_params
*info
,
1705 struct cgraph_node
*node
,
1706 struct ipa_agg_jf_item
*item
)
1708 tree value
= NULL_TREE
;
1711 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
1714 if (item
->jftype
== IPA_JF_CONST
)
1715 return item
->value
.constant
;
1717 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1718 || item
->jftype
== IPA_JF_LOAD_AGG
);
1720 src_idx
= item
->value
.pass_through
.formal_id
;
1722 if (info
->ipcp_orig_node
)
1724 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1725 value
= info
->known_csts
[src_idx
];
1727 value
= get_clone_agg_value (node
, item
->value
.load_agg
.offset
,
1730 else if (info
->lattices
)
1732 class ipcp_param_lattices
*src_plats
1733 = ipa_get_parm_lattices (info
, src_idx
);
1735 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1737 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1739 if (!lat
->is_single_const ())
1742 value
= lat
->values
->value
;
1744 else if (src_plats
->aggs
1745 && !src_plats
->aggs_bottom
1746 && !src_plats
->aggs_contain_variable
1747 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1749 struct ipcp_agg_lattice
*aglat
;
1751 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1753 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1756 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1758 if (aglat
->is_single_const ())
1759 value
= aglat
->values
->value
;
1769 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1771 tree load_type
= item
->value
.load_agg
.type
;
1772 tree value_type
= TREE_TYPE (value
);
1774 /* Ensure value type is compatible with load type. */
1775 if (!useless_type_conversion_p (load_type
, value_type
))
1779 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1781 item
->value
.pass_through
.operand
,
1785 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1786 an aggregate and if so, return it. Otherwise return an empty set. NODE
1787 and INFO describes the caller node or the one it is inlined to, and its
1790 struct ipa_agg_value_set
1791 ipa_agg_value_set_from_jfunc (class ipa_node_params
*info
, cgraph_node
*node
,
1792 struct ipa_agg_jump_function
*agg_jfunc
)
1794 struct ipa_agg_value_set agg
;
1795 struct ipa_agg_jf_item
*item
;
1799 agg
.by_ref
= agg_jfunc
->by_ref
;
1801 FOR_EACH_VEC_SAFE_ELT (agg_jfunc
->items
, i
, item
)
1803 tree value
= ipa_agg_value_from_node (info
, node
, item
);
1807 struct ipa_agg_value value_item
;
1809 value_item
.offset
= item
->offset
;
1810 value_item
.value
= value
;
1812 agg
.items
.safe_push (value_item
);
1818 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1819 bottom, not containing a variable component and without any known value at
1823 ipcp_verify_propagated_values (void)
1825 struct cgraph_node
*node
;
1827 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1829 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1830 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1831 || !opt_for_fn (node
->decl
, optimize
))
1833 int i
, count
= ipa_get_param_count (info
);
1835 for (i
= 0; i
< count
; i
++)
1837 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1840 && !lat
->contains_variable
1841 && lat
->values_count
== 0)
1845 symtab
->dump (dump_file
);
1846 fprintf (dump_file
, "\nIPA lattices after constant "
1847 "propagation, before gcc_unreachable:\n");
1848 print_all_lattices (dump_file
, true, false);
1857 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1860 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1861 ipa_polymorphic_call_context y
)
1863 return x
.equal_to (y
);
1867 /* Add a new value source to the value represented by THIS, marking that a
1868 value comes from edge CS and (if the underlying jump function is a
1869 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1870 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1871 scalar value of the parameter itself or the offset within an aggregate. */
1873 template <typename valtype
>
1875 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1876 int src_idx
, HOST_WIDE_INT offset
)
1878 ipcp_value_source
<valtype
> *src
;
1880 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1881 src
->offset
= offset
;
1884 src
->index
= src_idx
;
1886 src
->next
= sources
;
1890 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1891 SOURCE and clear all other fields. */
1893 static ipcp_value
<tree
> *
1894 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1896 ipcp_value
<tree
> *val
;
1898 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1900 val
->self_recursion_generated_level
= same_lat_gen_level
;
1904 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1905 value to SOURCE and clear all other fields. */
1907 static ipcp_value
<ipa_polymorphic_call_context
> *
1908 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1909 unsigned same_lat_gen_level
)
1911 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1913 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1914 ipcp_value
<ipa_polymorphic_call_context
>();
1916 val
->self_recursion_generated_level
= same_lat_gen_level
;
1920 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1921 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1922 meaning. OFFSET -1 means the source is scalar and not a part of an
1923 aggregate. If non-NULL, VAL_P records address of existing or newly added
1926 If the value is generated for a self-recursive call as a result of an
1927 arithmetic pass-through jump-function acting on a value in the same lattice,
1928 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1929 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1931 template <typename valtype
>
1933 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1934 ipcp_value
<valtype
> *src_val
,
1935 int src_idx
, HOST_WIDE_INT offset
,
1936 ipcp_value
<valtype
> **val_p
,
1937 unsigned same_lat_gen_level
)
1939 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
1947 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
1948 if (values_equal_for_ipcp_p (val
->value
, newval
))
1953 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
1954 val
->self_recursion_generated_level
= same_lat_gen_level
;
1956 if (ipa_edge_within_scc (cs
))
1958 ipcp_value_source
<valtype
> *s
;
1959 for (s
= val
->sources
; s
; s
= s
->next
)
1960 if (s
->cs
== cs
&& s
->val
== src_val
)
1966 val
->add_source (cs
, src_val
, src_idx
, offset
);
1970 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
1971 param_ipa_cp_value_list_size
))
1973 /* We can only free sources, not the values themselves, because sources
1974 of other values in this SCC might point to them. */
1975 for (val
= values
; val
; val
= val
->next
)
1977 while (val
->sources
)
1979 ipcp_value_source
<valtype
> *src
= val
->sources
;
1980 val
->sources
= src
->next
;
1981 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
1985 return set_to_bottom ();
1989 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
1990 val
->add_source (cs
, src_val
, src_idx
, offset
);
1993 /* Add the new value to end of value list, which can reduce iterations
1994 of propagation stage for recursive function. */
1996 last_val
->next
= val
;
2006 /* A helper function that returns result of operation specified by OPCODE on
2007 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2008 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2009 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2013 get_val_across_arith_op (enum tree_code opcode
,
2016 ipcp_value
<tree
> *src_val
,
2019 tree opnd1
= src_val
->value
;
2021 /* Skip source values that is incompatible with specified type. */
2023 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2026 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2029 /* Propagate values through an arithmetic transformation described by a jump
2030 function associated with edge CS, taking values from SRC_LAT and putting
2031 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2032 OPND2 is a constant value if transformation is a binary operation.
2033 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2034 a part of the aggregate. SRC_IDX is the index of the source parameter.
2035 RES_TYPE is the value type of result being propagated into. Return true if
2036 DEST_LAT changed. */
2039 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2040 enum tree_code opcode
,
2043 ipcp_lattice
<tree
> *src_lat
,
2044 ipcp_lattice
<tree
> *dest_lat
,
2045 HOST_WIDE_INT src_offset
,
2049 ipcp_value
<tree
> *src_val
;
2052 /* Due to circular dependencies, propagating within an SCC through arithmetic
2053 transformation would create infinite number of values. But for
2054 self-feeding recursive function, we could allow propagation in a limited
2055 count, and this can enable a simple kind of recursive function versioning.
2056 For other scenario, we would just make lattices bottom. */
2057 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2061 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2062 param_ipa_cp_max_recursive_depth
);
2063 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2064 return dest_lat
->set_contains_variable ();
2066 /* No benefit if recursive execution is in low probability. */
2067 if (cs
->sreal_frequency () * 100
2068 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2069 param_ipa_cp_min_recursive_probability
))
2070 return dest_lat
->set_contains_variable ();
2072 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2074 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2076 /* Now we do not use self-recursively generated value as propagation
2077 source, this is absolutely conservative, but could avoid explosion
2078 of lattice's value space, especially when one recursive function
2079 calls another recursive. */
2080 if (src_val
->self_recursion_generated_p ())
2082 ipcp_value_source
<tree
> *s
;
2084 /* If the lattice has already been propagated for the call site,
2085 no need to do that again. */
2086 for (s
= src_val
->sources
; s
; s
= s
->next
)
2088 return dest_lat
->set_contains_variable ();
2091 val_seeds
.safe_push (src_val
);
2094 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2096 /* Recursively generate lattice values with a limited count. */
2097 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2099 for (int j
= 1; j
< max_recursive_depth
; j
++)
2101 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2104 || !ipacp_value_safe_for_type (res_type
, cstval
))
2107 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2108 src_offset
, &src_val
, j
);
2109 gcc_checking_assert (src_val
);
2112 ret
|= dest_lat
->set_contains_variable ();
2115 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2117 /* Now we do not use self-recursively generated value as propagation
2118 source, otherwise it is easy to make value space of normal lattice
2120 if (src_val
->self_recursion_generated_p ())
2122 ret
|= dest_lat
->set_contains_variable ();
2126 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2129 && ipacp_value_safe_for_type (res_type
, cstval
))
2130 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2133 ret
|= dest_lat
->set_contains_variable ();
2139 /* Propagate values through a pass-through jump function JFUNC associated with
2140 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2141 is the index of the source parameter. PARM_TYPE is the type of the
2142 parameter to which the result is passed. */
2145 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2146 ipcp_lattice
<tree
> *src_lat
,
2147 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2150 return propagate_vals_across_arith_jfunc (cs
,
2151 ipa_get_jf_pass_through_operation (jfunc
),
2153 ipa_get_jf_pass_through_operand (jfunc
),
2154 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2157 /* Propagate values through an ancestor jump function JFUNC associated with
2158 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2159 is the index of the source parameter. */
2162 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2163 struct ipa_jump_func
*jfunc
,
2164 ipcp_lattice
<tree
> *src_lat
,
2165 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2168 ipcp_value
<tree
> *src_val
;
2171 if (ipa_edge_within_scc (cs
))
2172 return dest_lat
->set_contains_variable ();
2174 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2176 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2178 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2179 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2181 ret
|= dest_lat
->set_contains_variable ();
2187 /* Propagate scalar values across jump function JFUNC that is associated with
2188 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2189 parameter to which the result is passed. */
2192 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2193 struct ipa_jump_func
*jfunc
,
2194 ipcp_lattice
<tree
> *dest_lat
,
2197 if (dest_lat
->bottom
)
2200 if (jfunc
->type
== IPA_JF_CONST
)
2202 tree val
= ipa_get_jf_constant (jfunc
);
2203 if (ipacp_value_safe_for_type (param_type
, val
))
2204 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2206 return dest_lat
->set_contains_variable ();
2208 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2209 || jfunc
->type
== IPA_JF_ANCESTOR
)
2211 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2212 ipcp_lattice
<tree
> *src_lat
;
2216 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2217 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2219 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2221 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2222 if (src_lat
->bottom
)
2223 return dest_lat
->set_contains_variable ();
2225 /* If we would need to clone the caller and cannot, do not propagate. */
2226 if (!ipcp_versionable_function_p (cs
->caller
)
2227 && (src_lat
->contains_variable
2228 || (src_lat
->values_count
> 1)))
2229 return dest_lat
->set_contains_variable ();
2231 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2232 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2236 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2237 src_idx
, param_type
);
2239 if (src_lat
->contains_variable
)
2240 ret
|= dest_lat
->set_contains_variable ();
2245 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2246 use it for indirect inlining), we should propagate them too. */
2247 return dest_lat
->set_contains_variable ();
2250 /* Propagate scalar values across jump function JFUNC that is associated with
2251 edge CS and describes argument IDX and put the values into DEST_LAT. */
2254 propagate_context_across_jump_function (cgraph_edge
*cs
,
2255 ipa_jump_func
*jfunc
, int idx
,
2256 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2258 if (dest_lat
->bottom
)
2260 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2262 bool added_sth
= false;
2263 bool type_preserved
= true;
2265 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2266 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2269 edge_ctx
= *edge_ctx_ptr
;
2271 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2272 || jfunc
->type
== IPA_JF_ANCESTOR
)
2274 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2276 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2278 /* TODO: Once we figure out how to propagate speculations, it will
2279 probably be a good idea to switch to speculation if type_preserved is
2280 not set instead of punting. */
2281 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2283 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2285 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2286 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2290 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2291 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2294 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2295 /* If we would need to clone the caller and cannot, do not propagate. */
2296 if (!ipcp_versionable_function_p (cs
->caller
)
2297 && (src_lat
->contains_variable
2298 || (src_lat
->values_count
> 1)))
2301 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2302 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2304 ipa_polymorphic_call_context cur
= src_val
->value
;
2306 if (!type_preserved
)
2307 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2308 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2309 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2310 /* TODO: In cases we know how the context is going to be used,
2311 we can improve the result by passing proper OTR_TYPE. */
2312 cur
.combine_with (edge_ctx
);
2313 if (!cur
.useless_p ())
2315 if (src_lat
->contains_variable
2316 && !edge_ctx
.equal_to (cur
))
2317 ret
|= dest_lat
->set_contains_variable ();
2318 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2327 if (!edge_ctx
.useless_p ())
2328 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2330 ret
|= dest_lat
->set_contains_variable ();
2336 /* Propagate bits across jfunc that is associated with
2337 edge cs and update dest_lattice accordingly. */
2340 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2341 ipa_jump_func
*jfunc
,
2342 ipcp_bits_lattice
*dest_lattice
)
2344 if (dest_lattice
->bottom_p ())
2347 enum availability availability
;
2348 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2349 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2350 tree parm_type
= ipa_get_type (callee_info
, idx
);
2352 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2353 transform for these cases. Similarly, we can have bad type mismatches
2354 with LTO, avoid doing anything with those too. */
2356 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2359 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2360 "param %i of %s is NULL or unsuitable for bits propagation\n",
2361 idx
, cs
->callee
->dump_name ());
2363 return dest_lattice
->set_to_bottom ();
2366 unsigned precision
= TYPE_PRECISION (parm_type
);
2367 signop sgn
= TYPE_SIGN (parm_type
);
2369 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2370 || jfunc
->type
== IPA_JF_ANCESTOR
)
2372 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2373 tree operand
= NULL_TREE
;
2374 enum tree_code code
;
2377 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2379 code
= ipa_get_jf_pass_through_operation (jfunc
);
2380 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2381 if (code
!= NOP_EXPR
)
2382 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2386 code
= POINTER_PLUS_EXPR
;
2387 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2388 unsigned HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2389 operand
= build_int_cstu (size_type_node
, offset
);
2392 class ipcp_param_lattices
*src_lats
2393 = ipa_get_parm_lattices (caller_info
, src_idx
);
2395 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2401 Assume lattice for x is bottom, however we can still propagate
2402 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2403 and we store it in jump function during analysis stage. */
2405 if (src_lats
->bits_lattice
.bottom_p ()
2407 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2410 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
, sgn
,
2414 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
2415 return dest_lattice
->set_to_bottom ();
2416 else if (jfunc
->bits
)
2417 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2420 return dest_lattice
->set_to_bottom ();
2423 /* Propagate value range across jump function JFUNC that is associated with
2424 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2428 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2429 class ipcp_param_lattices
*dest_plats
,
2432 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2434 if (dest_lat
->bottom_p ())
2438 || (!INTEGRAL_TYPE_P (param_type
)
2439 && !POINTER_TYPE_P (param_type
)))
2440 return dest_lat
->set_to_bottom ();
2442 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2444 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2445 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2446 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2447 class ipcp_param_lattices
*src_lats
2448 = ipa_get_parm_lattices (caller_info
, src_idx
);
2449 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2451 if (src_lats
->m_value_range
.bottom_p ())
2452 return dest_lat
->set_to_bottom ();
2455 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2456 ipa_vr_operation_and_type_effects (&vr
,
2457 &src_lats
->m_value_range
.m_vr
,
2458 operation
, param_type
,
2460 /* A crude way to prevent unbounded number of value range updates
2461 in SCC components. We should allow limited number of updates within
2463 else if (!ipa_edge_within_scc (cs
))
2465 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2466 value_range
op_vr (op
, op
);
2467 value_range op_res
,res
;
2469 range_fold_binary_expr (&op_res
, operation
, operand_type
,
2470 &src_lats
->m_value_range
.m_vr
, &op_vr
);
2471 ipa_vr_operation_and_type_effects (&vr
,
2473 NOP_EXPR
, param_type
,
2476 if (!vr
.undefined_p () && !vr
.varying_p ())
2481 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2484 jfunc
->m_vr
->type ()))
2487 return dest_lat
->meet_with (&vr
);
2490 else if (jfunc
->type
== IPA_JF_CONST
)
2492 tree val
= ipa_get_jf_constant (jfunc
);
2493 if (TREE_CODE (val
) == INTEGER_CST
)
2495 val
= fold_convert (param_type
, val
);
2496 if (TREE_OVERFLOW_P (val
))
2497 val
= drop_tree_overflow (val
);
2499 value_range
tmpvr (val
, val
);
2500 return dest_lat
->meet_with (&tmpvr
);
2506 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2508 jfunc
->m_vr
->type ()))
2509 return dest_lat
->meet_with (&vr
);
2511 return dest_lat
->set_to_bottom ();
2514 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2515 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2516 other cases, return false). If there are no aggregate items, set
2517 aggs_by_ref to NEW_AGGS_BY_REF. */
2520 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2521 bool new_aggs_by_ref
)
2523 if (dest_plats
->aggs
)
2525 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2527 set_agg_lats_to_bottom (dest_plats
);
2532 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2536 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2537 already existing lattice for the given OFFSET and SIZE, marking all skipped
2538 lattices as containing variable and checking for overlaps. If there is no
2539 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2540 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2541 unless there are too many already. If there are two many, return false. If
2542 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2543 skipped lattices were newly marked as containing variable, set *CHANGE to
2544 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2547 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2548 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2549 struct ipcp_agg_lattice
***aglat
,
2550 bool pre_existing
, bool *change
, int max_agg_items
)
2552 gcc_checking_assert (offset
>= 0);
2554 while (**aglat
&& (**aglat
)->offset
< offset
)
2556 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2558 set_agg_lats_to_bottom (dest_plats
);
2561 *change
|= (**aglat
)->set_contains_variable ();
2562 *aglat
= &(**aglat
)->next
;
2565 if (**aglat
&& (**aglat
)->offset
== offset
)
2567 if ((**aglat
)->size
!= val_size
)
2569 set_agg_lats_to_bottom (dest_plats
);
2572 gcc_assert (!(**aglat
)->next
2573 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2578 struct ipcp_agg_lattice
*new_al
;
2580 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2582 set_agg_lats_to_bottom (dest_plats
);
2585 if (dest_plats
->aggs_count
== max_agg_items
)
2587 dest_plats
->aggs_count
++;
2588 new_al
= ipcp_agg_lattice_pool
.allocate ();
2589 memset (new_al
, 0, sizeof (*new_al
));
2591 new_al
->offset
= offset
;
2592 new_al
->size
= val_size
;
2593 new_al
->contains_variable
= pre_existing
;
2595 new_al
->next
= **aglat
;
2601 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2602 containing an unknown value. */
2605 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2610 ret
|= aglat
->set_contains_variable ();
2611 aglat
= aglat
->next
;
2616 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2617 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2618 parameter used for lattice value sources. Return true if DEST_PLATS changed
2622 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2623 class ipcp_param_lattices
*dest_plats
,
2624 class ipcp_param_lattices
*src_plats
,
2625 int src_idx
, HOST_WIDE_INT offset_delta
)
2627 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2628 struct ipcp_agg_lattice
**dst_aglat
;
2631 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2633 if (src_plats
->aggs_bottom
)
2634 return set_agg_lats_contain_variable (dest_plats
);
2635 if (src_plats
->aggs_contain_variable
)
2636 ret
|= set_agg_lats_contain_variable (dest_plats
);
2637 dst_aglat
= &dest_plats
->aggs
;
2639 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2640 param_ipa_max_agg_items
);
2641 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2643 src_aglat
= src_aglat
->next
)
2645 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2649 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2650 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2652 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2654 dst_aglat
= &(*dst_aglat
)->next
;
2655 if (src_aglat
->bottom
)
2657 ret
|= new_al
->set_contains_variable ();
2660 if (src_aglat
->contains_variable
)
2661 ret
|= new_al
->set_contains_variable ();
2662 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2665 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2668 else if (dest_plats
->aggs_bottom
)
2671 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2675 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2676 pass-through JFUNC and if so, whether it has conform and conforms to the
2677 rules about propagating values passed by reference. */
2680 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2681 struct ipa_jump_func
*jfunc
)
2683 return src_plats
->aggs
2684 && (!src_plats
->aggs_by_ref
2685 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2688 /* Propagate values through ITEM, jump function for a part of an aggregate,
2689 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2690 associated with the jump function. Return true if AGLAT changed in any
2694 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2695 struct ipa_agg_jf_item
*item
,
2696 struct ipcp_agg_lattice
*aglat
)
2698 class ipa_node_params
*caller_info
;
2699 class ipcp_param_lattices
*src_plats
;
2700 struct ipcp_lattice
<tree
> *src_lat
;
2701 HOST_WIDE_INT src_offset
;
2706 if (item
->jftype
== IPA_JF_CONST
)
2708 tree value
= item
->value
.constant
;
2710 gcc_checking_assert (is_gimple_ip_invariant (value
));
2711 return aglat
->add_value (value
, cs
, NULL
, 0);
2714 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2715 || item
->jftype
== IPA_JF_LOAD_AGG
);
2717 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2718 src_idx
= item
->value
.pass_through
.formal_id
;
2719 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2721 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2723 load_type
= NULL_TREE
;
2724 src_lat
= &src_plats
->itself
;
2729 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2730 struct ipcp_agg_lattice
*src_aglat
;
2732 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2733 if (src_aglat
->offset
>= load_offset
)
2736 load_type
= item
->value
.load_agg
.type
;
2738 || src_aglat
->offset
> load_offset
2739 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2740 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2741 return aglat
->set_contains_variable ();
2743 src_lat
= src_aglat
;
2744 src_offset
= load_offset
;
2748 || (!ipcp_versionable_function_p (cs
->caller
)
2749 && !src_lat
->is_single_const ()))
2750 return aglat
->set_contains_variable ();
2752 ret
= propagate_vals_across_arith_jfunc (cs
,
2753 item
->value
.pass_through
.operation
,
2755 item
->value
.pass_through
.operand
,
2761 if (src_lat
->contains_variable
)
2762 ret
|= aglat
->set_contains_variable ();
2767 /* Propagate scalar values across jump function JFUNC that is associated with
2768 edge CS and put the values into DEST_LAT. */
2771 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2772 struct ipa_jump_func
*jfunc
,
2773 class ipcp_param_lattices
*dest_plats
)
2777 if (dest_plats
->aggs_bottom
)
2780 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2781 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2783 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2784 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2785 class ipcp_param_lattices
*src_plats
;
2787 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2788 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2790 /* Currently we do not produce clobber aggregate jump
2791 functions, replace with merging when we do. */
2792 gcc_assert (!jfunc
->agg
.items
);
2793 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2798 else if (jfunc
->type
== IPA_JF_ANCESTOR
2799 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2801 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2802 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2803 class ipcp_param_lattices
*src_plats
;
2805 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2806 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2808 /* Currently we do not produce clobber aggregate jump
2809 functions, replace with merging when we do. */
2810 gcc_assert (!jfunc
->agg
.items
);
2811 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2812 ipa_get_jf_ancestor_offset (jfunc
));
2814 else if (!src_plats
->aggs_by_ref
)
2815 ret
|= set_agg_lats_to_bottom (dest_plats
);
2817 ret
|= set_agg_lats_contain_variable (dest_plats
);
2821 if (jfunc
->agg
.items
)
2823 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2824 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2825 struct ipa_agg_jf_item
*item
;
2828 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2831 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2832 param_ipa_max_agg_items
);
2833 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2835 HOST_WIDE_INT val_size
;
2837 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2839 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2841 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2842 &aglat
, pre_existing
, &ret
, max_agg_items
))
2844 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2845 aglat
= &(*aglat
)->next
;
2847 else if (dest_plats
->aggs_bottom
)
2851 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2854 ret
|= set_agg_lats_contain_variable (dest_plats
);
2859 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2860 non-thunk) destination, the call passes through a thunk. */
2863 call_passes_through_thunk (cgraph_edge
*cs
)
2865 cgraph_node
*alias_or_thunk
= cs
->callee
;
2866 while (alias_or_thunk
->alias
)
2867 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2868 return alias_or_thunk
->thunk
;
2871 /* Propagate constants from the caller to the callee of CS. INFO describes the
2875 propagate_constants_across_call (struct cgraph_edge
*cs
)
2877 class ipa_node_params
*callee_info
;
2878 enum availability availability
;
2879 cgraph_node
*callee
;
2880 class ipa_edge_args
*args
;
2882 int i
, args_count
, parms_count
;
2884 callee
= cs
->callee
->function_symbol (&availability
);
2885 if (!callee
->definition
)
2887 gcc_checking_assert (callee
->has_gimple_body_p ());
2888 callee_info
= ipa_node_params_sum
->get (callee
);
2892 args
= ipa_edge_args_sum
->get (cs
);
2893 parms_count
= ipa_get_param_count (callee_info
);
2894 if (parms_count
== 0)
2897 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2898 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2900 for (i
= 0; i
< parms_count
; i
++)
2901 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2905 args_count
= ipa_get_cs_argument_count (args
);
2907 /* If this call goes through a thunk we must not propagate to the first (0th)
2908 parameter. However, we might need to uncover a thunk from below a series
2909 of aliases first. */
2910 if (call_passes_through_thunk (cs
))
2912 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2919 for (; (i
< args_count
) && (i
< parms_count
); i
++)
2921 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
2922 class ipcp_param_lattices
*dest_plats
;
2923 tree param_type
= ipa_get_type (callee_info
, i
);
2925 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
2926 if (availability
== AVAIL_INTERPOSABLE
)
2927 ret
|= set_all_contains_variable (dest_plats
);
2930 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
2931 &dest_plats
->itself
,
2933 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
2934 &dest_plats
->ctxlat
);
2936 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
2937 &dest_plats
->bits_lattice
);
2938 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
2940 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
2941 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
2942 dest_plats
, param_type
);
2944 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
2947 for (; i
< parms_count
; i
++)
2948 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
2953 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2954 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2955 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2958 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
2959 const vec
<tree
> &known_csts
,
2960 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
2961 const vec
<ipa_agg_value_set
> &known_aggs
,
2962 struct ipa_agg_replacement_value
*agg_reps
,
2965 int param_index
= ie
->indirect_info
->param_index
;
2966 HOST_WIDE_INT anc_offset
;
2970 *speculative
= false;
2972 if (param_index
== -1)
2975 if (!ie
->indirect_info
->polymorphic
)
2979 if (ie
->indirect_info
->agg_contents
)
2982 if (agg_reps
&& ie
->indirect_info
->guaranteed_unmodified
)
2986 if (agg_reps
->index
== param_index
2987 && agg_reps
->offset
== ie
->indirect_info
->offset
2988 && agg_reps
->by_ref
== ie
->indirect_info
->by_ref
)
2990 t
= agg_reps
->value
;
2993 agg_reps
= agg_reps
->next
;
2998 const ipa_agg_value_set
*agg
;
2999 if (known_aggs
.length () > (unsigned int) param_index
)
3000 agg
= &known_aggs
[param_index
];
3003 bool from_global_constant
;
3004 t
= ipa_find_agg_cst_for_param (agg
,
3005 (unsigned) param_index
3006 < known_csts
.length ()
3007 ? known_csts
[param_index
]
3009 ie
->indirect_info
->offset
,
3010 ie
->indirect_info
->by_ref
,
3011 &from_global_constant
);
3013 && !from_global_constant
3014 && !ie
->indirect_info
->guaranteed_unmodified
)
3018 else if ((unsigned) param_index
< known_csts
.length ())
3019 t
= known_csts
[param_index
];
3022 && TREE_CODE (t
) == ADDR_EXPR
3023 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3024 return TREE_OPERAND (t
, 0);
3029 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3032 gcc_assert (!ie
->indirect_info
->agg_contents
);
3033 anc_offset
= ie
->indirect_info
->offset
;
3037 /* Try to work out value of virtual table pointer value in replacements. */
3038 if (!t
&& agg_reps
&& !ie
->indirect_info
->by_ref
)
3042 if (agg_reps
->index
== param_index
3043 && agg_reps
->offset
== ie
->indirect_info
->offset
3044 && agg_reps
->by_ref
)
3046 t
= agg_reps
->value
;
3049 agg_reps
= agg_reps
->next
;
3053 /* Try to work out value of virtual table pointer value in known
3054 aggregate values. */
3055 if (!t
&& known_aggs
.length () > (unsigned int) param_index
3056 && !ie
->indirect_info
->by_ref
)
3058 const ipa_agg_value_set
*agg
= &known_aggs
[param_index
];
3059 t
= ipa_find_agg_cst_for_param (agg
,
3060 (unsigned) param_index
3061 < known_csts
.length ()
3062 ? known_csts
[param_index
] : NULL
,
3063 ie
->indirect_info
->offset
, true);
3066 /* If we found the virtual table pointer, lookup the target. */
3070 unsigned HOST_WIDE_INT offset
;
3071 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3074 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3075 vtable
, offset
, &can_refer
);
3079 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3080 || !possible_polymorphic_call_target_p
3081 (ie
, cgraph_node::get (target
)))
3083 /* Do not speculate builtin_unreachable, it is stupid! */
3084 if (ie
->indirect_info
->vptr_changed
)
3086 target
= ipa_impossible_devirt_target (ie
, target
);
3088 *speculative
= ie
->indirect_info
->vptr_changed
;
3095 /* Do we know the constant value of pointer? */
3096 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3097 t
= known_csts
[param_index
];
3099 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3101 ipa_polymorphic_call_context context
;
3102 if (known_contexts
.length () > (unsigned int) param_index
)
3104 context
= known_contexts
[param_index
];
3105 context
.offset_by (anc_offset
);
3106 if (ie
->indirect_info
->vptr_changed
)
3107 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3108 ie
->indirect_info
->otr_type
);
3111 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3112 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3113 if (!ctx2
.useless_p ())
3114 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3119 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3121 if (ie
->indirect_info
->vptr_changed
)
3122 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3123 ie
->indirect_info
->otr_type
);
3128 vec
<cgraph_node
*>targets
;
3131 targets
= possible_polymorphic_call_targets
3132 (ie
->indirect_info
->otr_type
,
3133 ie
->indirect_info
->otr_token
,
3135 if (!final
|| targets
.length () > 1)
3137 struct cgraph_node
*node
;
3140 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3141 || ie
->speculative
|| !ie
->maybe_hot_p ())
3143 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3144 ie
->indirect_info
->otr_token
,
3148 *speculative
= true;
3149 target
= node
->decl
;
3156 *speculative
= false;
3157 if (targets
.length () == 1)
3158 target
= targets
[0]->decl
;
3160 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3163 if (target
&& !possible_polymorphic_call_target_p (ie
,
3164 cgraph_node::get (target
)))
3168 target
= ipa_impossible_devirt_target (ie
, target
);
3174 /* If an indirect edge IE can be turned into a direct one based on data in
3175 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3176 whether the discovered target is only speculative guess. */
3179 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3180 ipa_call_arg_values
*avals
,
3183 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3184 avals
->m_known_contexts
,
3185 avals
->m_known_aggs
,
3189 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3192 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3193 ipa_auto_call_arg_values
*avals
,
3196 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3197 avals
->m_known_contexts
,
3198 avals
->m_known_aggs
,
3202 /* Calculate devirtualization time bonus for NODE, assuming we know information
3203 about arguments stored in AVALS. */
3206 devirtualization_time_bonus (struct cgraph_node
*node
,
3207 ipa_auto_call_arg_values
*avals
)
3209 struct cgraph_edge
*ie
;
3212 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3214 struct cgraph_node
*callee
;
3215 class ipa_fn_summary
*isummary
;
3216 enum availability avail
;
3220 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3224 /* Only bare minimum benefit for clearly un-inlineable targets. */
3226 callee
= cgraph_node::get (target
);
3227 if (!callee
|| !callee
->definition
)
3229 callee
= callee
->function_symbol (&avail
);
3230 if (avail
< AVAIL_AVAILABLE
)
3232 isummary
= ipa_fn_summaries
->get (callee
);
3233 if (!isummary
|| !isummary
->inlinable
)
3236 int size
= ipa_size_summaries
->get (callee
)->size
;
3237 /* FIXME: The values below need re-considering and perhaps also
3238 integrating into the cost metrics, at lest in some very basic way. */
3239 int max_inline_insns_auto
3240 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3241 if (size
<= max_inline_insns_auto
/ 4)
3242 res
+= 31 / ((int)speculative
+ 1);
3243 else if (size
<= max_inline_insns_auto
/ 2)
3244 res
+= 15 / ((int)speculative
+ 1);
3245 else if (size
<= max_inline_insns_auto
3246 || DECL_DECLARED_INLINE_P (callee
->decl
))
3247 res
+= 7 / ((int)speculative
+ 1);
3253 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3256 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3259 ipa_hints hints
= estimates
.hints
;
3260 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3261 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3263 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3265 if (hints
& INLINE_HINT_loop_iterations
)
3266 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3268 if (hints
& INLINE_HINT_loop_stride
)
3269 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3274 /* If there is a reason to penalize the function described by INFO in the
3275 cloning goodness evaluation, do so. */
3278 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3281 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3282 evaluation
= (evaluation
3283 * (100 - opt_for_fn (node
->decl
,
3284 param_ipa_cp_recursion_penalty
))) / 100;
3286 if (info
->node_calling_single_call
)
3287 evaluation
= (evaluation
3288 * (100 - opt_for_fn (node
->decl
,
3289 param_ipa_cp_single_call_penalty
)))
3295 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3296 and SIZE_COST and with the sum of frequencies of incoming edges to the
3297 potential new clone in FREQUENCIES. */
3300 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3301 sreal freq_sum
, profile_count count_sum
,
3304 if (time_benefit
== 0
3305 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3306 || node
->optimize_for_size_p ())
3309 gcc_assert (size_cost
> 0);
3311 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3312 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3313 if (count_sum
> profile_count::zero ())
3315 gcc_assert (base_count
> profile_count::zero ());
3316 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3317 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3318 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3323 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3324 "size: %i, count_sum: ", time_benefit
.to_double (),
3326 count_sum
.dump (dump_file
);
3327 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3328 info
->node_within_scc
3329 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3330 info
->node_calling_single_call
? ", single_call" : "",
3331 evaluation
.to_double (), eval_threshold
);
3334 return evaluation
.to_int () >= eval_threshold
;
3338 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3339 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3343 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3344 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3346 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3347 info
->node_within_scc
3348 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3349 info
->node_calling_single_call
? ", single_call" : "",
3350 evaluation
.to_double (), eval_threshold
);
3352 return evaluation
.to_int () >= eval_threshold
;
3356 /* Return all context independent values from aggregate lattices in PLATS in a
3357 vector. Return NULL if there are none. */
3359 static vec
<ipa_agg_value
>
3360 context_independent_aggregate_values (class ipcp_param_lattices
*plats
)
3362 vec
<ipa_agg_value
> res
= vNULL
;
3364 if (plats
->aggs_bottom
3365 || plats
->aggs_contain_variable
3366 || plats
->aggs_count
== 0)
3369 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
;
3371 aglat
= aglat
->next
)
3372 if (aglat
->is_single_const ())
3374 struct ipa_agg_value item
;
3375 item
.offset
= aglat
->offset
;
3376 item
.value
= aglat
->values
->value
;
3377 res
.safe_push (item
);
3382 /* Grow vectors in AVALS and fill them with information about values of
3383 parameters that are known to be independent of the context. Only calculate
3384 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3385 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3386 parameters will be stored in it.
3388 TODO: Also grow context independent value range vectors. */
3391 gather_context_independent_values (class ipa_node_params
*info
,
3392 ipa_auto_call_arg_values
*avals
,
3393 bool calculate_aggs
,
3394 int *removable_params_cost
)
3396 int i
, count
= ipa_get_param_count (info
);
3399 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3400 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3402 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
3404 if (removable_params_cost
)
3405 *removable_params_cost
= 0;
3407 for (i
= 0; i
< count
; i
++)
3409 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3410 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3412 if (lat
->is_single_const ())
3414 ipcp_value
<tree
> *val
= lat
->values
;
3415 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3416 avals
->m_known_vals
[i
] = val
->value
;
3417 if (removable_params_cost
)
3418 *removable_params_cost
3419 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3422 else if (removable_params_cost
3423 && !ipa_is_param_used (info
, i
))
3424 *removable_params_cost
3425 += ipa_get_param_move_cost (info
, i
);
3427 if (!ipa_is_param_used (info
, i
))
3430 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3431 /* Do not account known context as reason for cloning. We can see
3432 if it permits devirtualization. */
3433 if (ctxlat
->is_single_const ())
3434 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3438 vec
<ipa_agg_value
> agg_items
;
3439 struct ipa_agg_value_set
*agg
;
3441 agg_items
= context_independent_aggregate_values (plats
);
3442 agg
= &avals
->m_known_aggs
[i
];
3443 agg
->items
= agg_items
;
3444 agg
->by_ref
= plats
->aggs_by_ref
;
3445 ret
|= !agg_items
.is_empty ();
3452 /* Perform time and size measurement of NODE with the context given in AVALS,
3453 calculate the benefit compared to the node without specialization and store
3454 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3455 context-independent or unused removable parameters and EST_MOVE_COST, the
3456 estimated movement of the considered parameter. */
3459 perform_estimation_of_a_value (cgraph_node
*node
,
3460 ipa_auto_call_arg_values
*avals
,
3461 int removable_params_cost
, int est_move_cost
,
3462 ipcp_value_base
*val
)
3465 ipa_call_estimates estimates
;
3467 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3469 /* Extern inline functions have no cloning local time benefits because they
3470 will be inlined anyway. The only reason to clone them is if it enables
3471 optimization in any of the functions they call. */
3472 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3475 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3476 + (devirtualization_time_bonus (node
, avals
)
3477 + hint_time_bonus (node
, estimates
)
3478 + removable_params_cost
+ est_move_cost
);
3480 int size
= estimates
.size
;
3481 gcc_checking_assert (size
>=0);
3482 /* The inliner-heuristics based estimates may think that in certain
3483 contexts some functions do not have any size at all but we want
3484 all specializations to have at least a tiny cost, not least not to
3489 val
->local_time_benefit
= time_benefit
;
3490 val
->local_size_cost
= size
;
3493 /* Get the overall limit oof growth based on parameters extracted from growth.
3494 it does not really make sense to mix functions with different overall growth
3495 limits but it is possible and if it happens, we do not want to select one
3499 get_max_overall_size (cgraph_node
*node
)
3501 long max_new_size
= orig_overall_size
;
3502 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3503 if (max_new_size
< large_unit
)
3504 max_new_size
= large_unit
;
3505 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3506 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3507 return max_new_size
;
3510 /* Iterate over known values of parameters of NODE and estimate the local
3511 effects in terms of time and size they have. */
3514 estimate_local_effects (struct cgraph_node
*node
)
3516 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3517 int i
, count
= ipa_get_param_count (info
);
3519 int removable_params_cost
;
3521 if (!count
|| !ipcp_versionable_function_p (node
))
3524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3525 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3527 ipa_auto_call_arg_values avals
;
3528 always_const
= gather_context_independent_values (info
, &avals
, true,
3529 &removable_params_cost
);
3530 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3531 if (always_const
|| devirt_bonus
3532 || (removable_params_cost
&& node
->can_change_signature
))
3534 struct caller_statistics stats
;
3535 ipa_call_estimates estimates
;
3537 init_caller_stats (&stats
);
3538 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3540 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3541 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3542 time
+= devirt_bonus
;
3543 time
+= hint_time_bonus (node
, estimates
);
3544 time
+= removable_params_cost
;
3545 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3548 fprintf (dump_file
, " - context independent values, size: %i, "
3549 "time_benefit: %f\n", size
, (time
).to_double ());
3551 if (size
<= 0 || node
->local
)
3553 info
->do_clone_for_all_contexts
= true;
3556 fprintf (dump_file
, " Decided to specialize for all "
3557 "known contexts, code not going to grow.\n");
3559 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3560 stats
.count_sum
, size
))
3562 if (size
+ overall_size
<= get_max_overall_size (node
))
3564 info
->do_clone_for_all_contexts
= true;
3565 overall_size
+= size
;
3568 fprintf (dump_file
, " Decided to specialize for all "
3569 "known contexts, growth (to %li) deemed "
3570 "beneficial.\n", overall_size
);
3572 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3573 fprintf (dump_file
, " Not cloning for all contexts because "
3574 "maximum unit size would be reached with %li.\n",
3575 size
+ overall_size
);
3577 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3578 fprintf (dump_file
, " Not cloning for all contexts because "
3579 "!good_cloning_opportunity_p.\n");
3583 for (i
= 0; i
< count
; i
++)
3585 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3586 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3587 ipcp_value
<tree
> *val
;
3591 || avals
.m_known_vals
[i
])
3594 for (val
= lat
->values
; val
; val
= val
->next
)
3596 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3597 avals
.m_known_vals
[i
] = val
->value
;
3599 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3600 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3605 fprintf (dump_file
, " - estimates for value ");
3606 print_ipcp_constant_value (dump_file
, val
->value
);
3607 fprintf (dump_file
, " for ");
3608 ipa_dump_param (dump_file
, info
, i
);
3609 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3610 val
->local_time_benefit
.to_double (),
3611 val
->local_size_cost
);
3614 avals
.m_known_vals
[i
] = NULL_TREE
;
3617 for (i
= 0; i
< count
; i
++)
3619 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3621 if (!plats
->virt_call
)
3624 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3625 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3629 || !avals
.m_known_contexts
[i
].useless_p ())
3632 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3634 avals
.m_known_contexts
[i
] = val
->value
;
3635 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3638 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3640 fprintf (dump_file
, " - estimates for polymorphic context ");
3641 print_ipcp_constant_value (dump_file
, val
->value
);
3642 fprintf (dump_file
, " for ");
3643 ipa_dump_param (dump_file
, info
, i
);
3644 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3645 val
->local_time_benefit
.to_double (),
3646 val
->local_size_cost
);
3649 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3652 for (i
= 0; i
< count
; i
++)
3654 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3656 if (plats
->aggs_bottom
|| !plats
->aggs
)
3659 ipa_agg_value_set
*agg
= &avals
.m_known_aggs
[i
];
3660 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3662 ipcp_value
<tree
> *val
;
3663 if (aglat
->bottom
|| !aglat
->values
3664 /* If the following is true, the one value is in known_aggs. */
3665 || (!plats
->aggs_contain_variable
3666 && aglat
->is_single_const ()))
3669 for (val
= aglat
->values
; val
; val
= val
->next
)
3671 struct ipa_agg_value item
;
3673 item
.offset
= aglat
->offset
;
3674 item
.value
= val
->value
;
3675 agg
->items
.safe_push (item
);
3677 perform_estimation_of_a_value (node
, &avals
,
3678 removable_params_cost
, 0, val
);
3680 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3682 fprintf (dump_file
, " - estimates for value ");
3683 print_ipcp_constant_value (dump_file
, val
->value
);
3684 fprintf (dump_file
, " for ");
3685 ipa_dump_param (dump_file
, info
, i
);
3686 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3687 "]: time_benefit: %g, size: %i\n",
3688 plats
->aggs_by_ref
? "ref " : "",
3690 val
->local_time_benefit
.to_double (),
3691 val
->local_size_cost
);
3701 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3702 topological sort of values. */
3704 template <typename valtype
>
3706 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3708 ipcp_value_source
<valtype
> *src
;
3714 cur_val
->dfs
= dfs_counter
;
3715 cur_val
->low_link
= dfs_counter
;
3717 cur_val
->topo_next
= stack
;
3719 cur_val
->on_stack
= true;
3721 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3724 if (src
->val
->dfs
== 0)
3727 if (src
->val
->low_link
< cur_val
->low_link
)
3728 cur_val
->low_link
= src
->val
->low_link
;
3730 else if (src
->val
->on_stack
3731 && src
->val
->dfs
< cur_val
->low_link
)
3732 cur_val
->low_link
= src
->val
->dfs
;
3735 if (cur_val
->dfs
== cur_val
->low_link
)
3737 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3742 stack
= v
->topo_next
;
3743 v
->on_stack
= false;
3744 v
->scc_no
= cur_val
->dfs
;
3746 v
->scc_next
= scc_list
;
3749 while (v
!= cur_val
);
3751 cur_val
->topo_next
= values_topo
;
3752 values_topo
= cur_val
;
3756 /* Add all values in lattices associated with NODE to the topological sort if
3757 they are not there yet. */
3760 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3762 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3763 int i
, count
= ipa_get_param_count (info
);
3765 for (i
= 0; i
< count
; i
++)
3767 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3768 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3769 struct ipcp_agg_lattice
*aglat
;
3773 ipcp_value
<tree
> *val
;
3774 for (val
= lat
->values
; val
; val
= val
->next
)
3775 topo
->constants
.add_val (val
);
3778 if (!plats
->aggs_bottom
)
3779 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3782 ipcp_value
<tree
> *val
;
3783 for (val
= aglat
->values
; val
; val
= val
->next
)
3784 topo
->constants
.add_val (val
);
3787 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3788 if (!ctxlat
->bottom
)
3790 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3791 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3792 topo
->contexts
.add_val (ctxval
);
3797 /* One pass of constants propagation along the call graph edges, from callers
3798 to callees (requires topological ordering in TOPO), iterate over strongly
3799 connected components. */
3802 propagate_constants_topo (class ipa_topo_info
*topo
)
3806 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3809 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3810 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3812 /* First, iteratively propagate within the strongly connected component
3813 until all lattices stabilize. */
3814 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3815 if (v
->has_gimple_body_p ())
3817 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3818 && opt_for_fn (v
->decl
, optimize
))
3819 push_node_to_stack (topo
, v
);
3820 /* When V is not optimized, we can not push it to stack, but
3821 still we need to set all its callees lattices to bottom. */
3824 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3825 propagate_constants_across_call (cs
);
3829 v
= pop_node_from_stack (topo
);
3832 struct cgraph_edge
*cs
;
3833 class ipa_node_params
*info
= NULL
;
3834 bool self_scc
= true;
3836 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3837 if (ipa_edge_within_scc (cs
))
3839 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3846 info
= ipa_node_params_sum
->get (v
);
3847 info
->node_within_scc
= true;
3850 if (propagate_constants_across_call (cs
))
3851 push_node_to_stack (topo
, callee
);
3855 info
->node_is_self_scc
= self_scc
;
3857 v
= pop_node_from_stack (topo
);
3860 /* Afterwards, propagate along edges leading out of the SCC, calculates
3861 the local effects of the discovered constants and all valid values to
3862 their topological sort. */
3863 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3864 if (v
->has_gimple_body_p ()
3865 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3866 && opt_for_fn (v
->decl
, optimize
))
3868 struct cgraph_edge
*cs
;
3870 estimate_local_effects (v
);
3871 add_all_node_vals_to_toposort (v
, topo
);
3872 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3873 if (!ipa_edge_within_scc (cs
))
3874 propagate_constants_across_call (cs
);
3876 cycle_nodes
.release ();
3880 /* Propagate the estimated effects of individual values along the topological
3881 from the dependent values to those they depend on. */
3883 template <typename valtype
>
3885 value_topo_info
<valtype
>::propagate_effects ()
3887 ipcp_value
<valtype
> *base
;
3888 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3890 for (base
= values_topo
; base
; base
= base
->topo_next
)
3892 ipcp_value_source
<valtype
> *src
;
3893 ipcp_value
<valtype
> *val
;
3895 HOST_WIDE_INT size
= 0;
3897 for (val
= base
; val
; val
= val
->scc_next
)
3899 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3900 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3903 for (val
= base
; val
; val
= val
->scc_next
)
3905 processed_srcvals
.empty ();
3906 for (src
= val
->sources
; src
; src
= src
->next
)
3908 && src
->cs
->maybe_hot_p ())
3910 if (!processed_srcvals
.add (src
->val
))
3912 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3913 if (prop_size
< INT_MAX
)
3914 src
->val
->prop_size_cost
= prop_size
;
3919 int special_factor
= 1;
3920 if (val
->same_scc (src
->val
))
3922 = opt_for_fn(src
->cs
->caller
->decl
,
3923 param_ipa_cp_recursive_freq_factor
);
3924 else if (val
->self_recursion_generated_p ()
3925 && (src
->cs
->callee
->function_symbol ()
3926 == src
->cs
->caller
))
3928 int max_recur_gen_depth
3929 = opt_for_fn(src
->cs
->caller
->decl
,
3930 param_ipa_cp_max_recursive_depth
);
3931 special_factor
= max_recur_gen_depth
3932 - val
->self_recursion_generated_level
+ 1;
3935 src
->val
->prop_time_benefit
3936 += time
* special_factor
* src
->cs
->sreal_frequency ();
3941 val
->prop_time_benefit
= time
;
3942 val
->prop_size_cost
= size
;
3946 val
->prop_time_benefit
= 0;
3947 val
->prop_size_cost
= 0;
3953 /* Callback for qsort to sort counts of all edges. */
3956 compare_edge_profile_counts (const void *a
, const void *b
)
3958 const profile_count
*cnt1
= (const profile_count
*) a
;
3959 const profile_count
*cnt2
= (const profile_count
*) b
;
3969 /* Propagate constants, polymorphic contexts and their effects from the
3970 summaries interprocedurally. */
3973 ipcp_propagate_stage (class ipa_topo_info
*topo
)
3975 struct cgraph_node
*node
;
3978 fprintf (dump_file
, "\n Propagating constants:\n\n");
3980 base_count
= profile_count::uninitialized ();
3982 bool compute_count_base
= false;
3983 unsigned base_count_pos_percent
= 0;
3984 FOR_EACH_DEFINED_FUNCTION (node
)
3986 if (node
->has_gimple_body_p ()
3987 && opt_for_fn (node
->decl
, flag_ipa_cp
)
3988 && opt_for_fn (node
->decl
, optimize
))
3990 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3991 determine_versionability (node
, info
);
3993 unsigned nlattices
= ipa_get_param_count (info
);
3994 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
3995 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
3996 initialize_node_lattices (node
);
3998 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
3999 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4000 overall_size
+= s
->self_size
;
4001 if (node
->count
.ipa ().initialized_p ())
4003 compute_count_base
= true;
4004 unsigned pos_percent
= opt_for_fn (node
->decl
,
4005 param_ipa_cp_profile_count_base
);
4006 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4010 if (compute_count_base
)
4012 auto_vec
<profile_count
> all_edge_counts
;
4013 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4014 FOR_EACH_DEFINED_FUNCTION (node
)
4015 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4017 profile_count count
= cs
->count
.ipa ();
4018 if (!(count
> profile_count::zero ()))
4021 enum availability avail
;
4023 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4024 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4025 if (info
&& info
->versionable
)
4026 all_edge_counts
.quick_push (count
);
4029 if (!all_edge_counts
.is_empty ())
4031 gcc_assert (base_count_pos_percent
<= 100);
4032 all_edge_counts
.qsort (compare_edge_profile_counts
);
4034 unsigned base_count_pos
4035 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4036 base_count
= all_edge_counts
[base_count_pos
];
4040 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4041 "position %u, arriving at: ", all_edge_counts
.length (),
4043 base_count
.dump (dump_file
);
4044 fprintf (dump_file
, "\n");
4048 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4049 "continuing as if without profile feedback.\n");
4052 orig_overall_size
= overall_size
;
4055 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4057 propagate_constants_topo (topo
);
4059 ipcp_verify_propagated_values ();
4060 topo
->constants
.propagate_effects ();
4061 topo
->contexts
.propagate_effects ();
4065 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4066 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4070 /* Discover newly direct outgoing edges from NODE which is a new clone with
4071 known KNOWN_CSTS and make them direct. */
4074 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4075 vec
<tree
> known_csts
,
4076 vec
<ipa_polymorphic_call_context
>
4078 struct ipa_agg_replacement_value
*aggvals
)
4080 struct cgraph_edge
*ie
, *next_ie
;
4083 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4088 next_ie
= ie
->next_callee
;
4089 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4090 vNULL
, aggvals
, &speculative
);
4093 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4094 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4095 int param_index
= ie
->indirect_info
->param_index
;
4096 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4100 if (cs
&& !agg_contents
&& !polymorphic
)
4102 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4103 int c
= ipa_get_controlled_uses (info
, param_index
);
4104 if (c
!= IPA_UNDESCRIBED_USE
4105 && !ipa_get_param_load_dereferenced (info
, param_index
))
4107 struct ipa_ref
*to_del
;
4110 ipa_set_controlled_uses (info
, param_index
, c
);
4111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4112 fprintf (dump_file
, " controlled uses count of param "
4113 "%i bumped down to %i\n", param_index
, c
);
4115 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0)))
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4118 fprintf (dump_file
, " and even removing its "
4119 "cloning-created reference\n");
4120 to_del
->remove_reference ();
4126 /* Turning calls to direct calls will improve overall summary. */
4128 ipa_update_overall_fn_summary (node
);
4131 class edge_clone_summary
;
4132 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4134 /* Edge clone summary. */
4136 class edge_clone_summary
4139 /* Default constructor. */
4140 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4142 /* Default destructor. */
4143 ~edge_clone_summary ()
4146 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4148 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4151 cgraph_edge
*prev_clone
;
4152 cgraph_edge
*next_clone
;
4155 class edge_clone_summary_t
:
4156 public call_summary
<edge_clone_summary
*>
4159 edge_clone_summary_t (symbol_table
*symtab
):
4160 call_summary
<edge_clone_summary
*> (symtab
)
4162 m_initialize_when_cloning
= true;
4165 virtual void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4166 edge_clone_summary
*src_data
,
4167 edge_clone_summary
*dst_data
);
4170 /* Edge duplication hook. */
4173 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4174 edge_clone_summary
*src_data
,
4175 edge_clone_summary
*dst_data
)
4177 if (src_data
->next_clone
)
4178 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4179 dst_data
->prev_clone
= src_edge
;
4180 dst_data
->next_clone
= src_data
->next_clone
;
4181 src_data
->next_clone
= dst_edge
;
4184 /* Return true is CS calls DEST or its clone for all contexts. When
4185 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4186 edges from/to an all-context clone. */
4189 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4190 bool allow_recursion_to_clone
)
4192 enum availability availability
;
4193 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4195 if (availability
<= AVAIL_INTERPOSABLE
)
4199 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4202 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4203 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4206 /* Return true if edge CS does bring about the value described by SRC to
4207 DEST_VAL of node DEST or its clone for all contexts. */
4210 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4211 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4213 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4215 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4216 || caller_info
->node_dead
)
4222 if (caller_info
->ipcp_orig_node
)
4225 if (src
->offset
== -1)
4226 t
= caller_info
->known_csts
[src
->index
];
4228 t
= get_clone_agg_value (cs
->caller
, src
->offset
, src
->index
);
4229 return (t
!= NULL_TREE
4230 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4234 if (src
->val
== dest_val
)
4237 struct ipcp_agg_lattice
*aglat
;
4238 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4240 if (src
->offset
== -1)
4241 return (plats
->itself
.is_single_const ()
4242 && values_equal_for_ipcp_p (src
->val
->value
,
4243 plats
->itself
.values
->value
));
4246 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4248 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4249 if (aglat
->offset
== src
->offset
)
4250 return (aglat
->is_single_const ()
4251 && values_equal_for_ipcp_p (src
->val
->value
,
4252 aglat
->values
->value
));
4258 /* Return true if edge CS does bring about the value described by SRC to
4259 DST_VAL of node DEST or its clone for all contexts. */
4262 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4263 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4265 ipcp_value
<ipa_polymorphic_call_context
> *)
4267 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4269 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4270 || caller_info
->node_dead
)
4275 if (caller_info
->ipcp_orig_node
)
4276 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4277 && values_equal_for_ipcp_p (src
->val
->value
,
4278 caller_info
->known_contexts
[src
->index
]);
4280 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4282 return plats
->ctxlat
.is_single_const ()
4283 && values_equal_for_ipcp_p (src
->val
->value
,
4284 plats
->ctxlat
.values
->value
);
4287 /* Get the next clone in the linked list of clones of an edge. */
4289 static inline struct cgraph_edge
*
4290 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4292 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4293 return s
!= NULL
? s
->next_clone
: NULL
;
4296 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4297 of them is viable and hot, return true. In that case, for those that still
4298 hold, add their edge frequency and their number and cumulative profile
4299 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4300 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4302 template <typename valtype
>
4304 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4305 sreal
*freq_sum
, int *caller_count
,
4306 profile_count
*rec_count_sum
,
4307 profile_count
*nonrec_count_sum
)
4309 ipcp_value_source
<valtype
> *src
;
4312 profile_count rec_cnt
= profile_count::zero ();
4313 profile_count nonrec_cnt
= profile_count::zero ();
4315 bool non_self_recursive
= false;
4317 for (src
= val
->sources
; src
; src
= src
->next
)
4319 struct cgraph_edge
*cs
= src
->cs
;
4322 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4325 freq
+= cs
->sreal_frequency ();
4326 hot
|= cs
->maybe_hot_p ();
4327 if (cs
->caller
!= dest
)
4329 non_self_recursive
= true;
4330 if (cs
->count
.ipa ().initialized_p ())
4331 rec_cnt
+= cs
->count
.ipa ();
4333 else if (cs
->count
.ipa ().initialized_p ())
4334 nonrec_cnt
+= cs
->count
.ipa ();
4336 cs
= get_next_cgraph_edge_clone (cs
);
4340 /* If the only edges bringing a value are self-recursive ones, do not bother
4342 if (!non_self_recursive
)
4346 *caller_count
= count
;
4347 *rec_count_sum
= rec_cnt
;
4348 *nonrec_count_sum
= nonrec_cnt
;
4350 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4352 struct cgraph_edge
*cs
;
4354 /* Cold non-SCC source edge could trigger hot recursive execution of
4355 function. Consider the case as hot and rely on following cost model
4356 computation to further select right one. */
4357 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4358 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4365 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4366 to let a non-self-recursive caller be the first element. Thus, we can
4367 simplify intersecting operations on values that arrive from all of these
4368 callers, especially when there exists self-recursive call. Return true if
4369 this kind of adjustment is possible. */
4372 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4375 for (unsigned i
= 0; i
< callers
.length (); i
++)
4377 cgraph_edge
*cs
= callers
[i
];
4379 if (cs
->caller
!= node
)
4383 callers
[i
] = callers
[0];
4392 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4393 is assumed their number is known and equal to CALLER_COUNT. */
4395 template <typename valtype
>
4396 static vec
<cgraph_edge
*>
4397 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4400 ipcp_value_source
<valtype
> *src
;
4401 vec
<cgraph_edge
*> ret
;
4403 ret
.create (caller_count
);
4404 for (src
= val
->sources
; src
; src
= src
->next
)
4406 struct cgraph_edge
*cs
= src
->cs
;
4409 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4410 ret
.quick_push (cs
);
4411 cs
= get_next_cgraph_edge_clone (cs
);
4415 if (caller_count
> 1)
4416 adjust_callers_for_value_intersection (ret
, dest
);
4421 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4422 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4423 should be set to true when the reference created for the constant should be
4424 a load one and not an address one because the corresponding parameter p is
4427 static struct ipa_replace_map
*
4428 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4429 bool force_load_ref
)
4431 struct ipa_replace_map
*replace_map
;
4433 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4436 fprintf (dump_file
, " replacing ");
4437 ipa_dump_param (dump_file
, info
, parm_num
);
4439 fprintf (dump_file
, " with const ");
4440 print_generic_expr (dump_file
, value
);
4443 fprintf (dump_file
, " - forcing load reference\n");
4445 fprintf (dump_file
, "\n");
4447 replace_map
->parm_num
= parm_num
;
4448 replace_map
->new_tree
= value
;
4449 replace_map
->force_load_ref
= force_load_ref
;
4453 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4454 one, otherwise it will be referred to as the original node. */
4457 dump_profile_updates (cgraph_node
*node
, bool spec
)
4460 fprintf (dump_file
, " setting count of the specialized node %s to ",
4461 node
->dump_name ());
4463 fprintf (dump_file
, " setting count of the original node %s to ",
4464 node
->dump_name ());
4466 node
->count
.dump (dump_file
);
4467 fprintf (dump_file
, "\n");
4468 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4470 fprintf (dump_file
, " edge to %s has count ",
4471 cs
->callee
->dump_name ());
4472 cs
->count
.dump (dump_file
);
4473 fprintf (dump_file
, "\n");
4477 /* With partial train run we do not want to assume that original's count is
4478 zero whenever we redurect all executed edges to clone. Simply drop profile
4479 to local one in this case. In eany case, return the new value. ORIG_NODE
4480 is the original node and its count has not been updaed yet. */
4483 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4485 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4486 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4487 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4488 remainder
= remainder
.guessed_local ();
4493 /* Structure to sum counts coming from nodes other than the original node and
4496 struct gather_other_count_struct
4499 profile_count other_count
;
4502 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4503 counts that come from non-self-recursive calls.. */
4506 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4508 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4509 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4510 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4511 desc
->other_count
+= cs
->count
.ipa ();
4515 /* Structure to help analyze if we need to boost counts of some clones of some
4516 non-recursive edges to match the new callee count. */
4518 struct desc_incoming_count_struct
4521 hash_set
<cgraph_edge
*> *processed_edges
;
4522 profile_count count
;
4523 unsigned unproc_orig_rec_edges
;
4526 /* Go over edges calling NODE and its thunks and gather information about
4527 incoming counts so that we know if we need to make any adjustments. */
4530 analyze_clone_icoming_counts (cgraph_node
*node
,
4531 desc_incoming_count_struct
*desc
)
4533 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4534 if (cs
->caller
->thunk
)
4536 analyze_clone_icoming_counts (cs
->caller
, desc
);
4541 if (cs
->count
.initialized_p ())
4542 desc
->count
+= cs
->count
.ipa ();
4543 if (!desc
->processed_edges
->contains (cs
)
4544 && cs
->caller
->clone_of
== desc
->orig
)
4545 desc
->unproc_orig_rec_edges
++;
4549 /* If caller edge counts of a clone created for a self-recursive arithmetic
4550 jump function must be adjusted because it is coming from a the "seed" clone
4551 for the first value and so has been excessively scaled back as if it was not
4552 a recursive call, adjust it so that the incoming counts of NODE match its
4553 count. NODE is the node or its thunk. */
4556 adjust_clone_incoming_counts (cgraph_node
*node
,
4557 desc_incoming_count_struct
*desc
)
4559 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4560 if (cs
->caller
->thunk
)
4562 adjust_clone_incoming_counts (cs
->caller
, desc
);
4563 profile_count sum
= profile_count::zero ();
4564 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4565 if (e
->count
.initialized_p ())
4566 sum
+= e
->count
.ipa ();
4567 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4569 else if (!desc
->processed_edges
->contains (cs
)
4570 && cs
->caller
->clone_of
== desc
->orig
)
4572 cs
->count
+= desc
->count
;
4575 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4576 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4577 cs
->callee
->dump_name ());
4578 cs
->count
.dump (dump_file
);
4579 fprintf (dump_file
, "\n");
4584 /* When ORIG_NODE has been cloned for values which have been generated fora
4585 self-recursive call as a result of an arithmetic pass-through
4586 jump-functions, adjust its count together with counts of all such clones in
4587 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4589 The function sums the counts of the original node and all its clones that
4590 cannot be attributed to a specific clone because it comes from a
4591 non-recursive edge. This sum is then evenly divided between the clones and
4592 on top of that each one gets all the counts which can be attributed directly
4596 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4597 const vec
<cgraph_node
*> &self_gen_clones
)
4599 profile_count redist_sum
= orig_node
->count
.ipa ();
4600 if (!(redist_sum
> profile_count::zero ()))
4604 fprintf (dump_file
, " Updating profile of self recursive clone "
4607 gather_other_count_struct gocs
;
4608 gocs
.orig
= orig_node
;
4609 gocs
.other_count
= profile_count::zero ();
4611 auto_vec
<profile_count
, 8> other_edges_count
;
4612 for (cgraph_node
*n
: self_gen_clones
)
4614 gocs
.other_count
= profile_count::zero ();
4615 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4617 other_edges_count
.safe_push (gocs
.other_count
);
4618 redist_sum
-= gocs
.other_count
;
4621 hash_set
<cgraph_edge
*> processed_edges
;
4623 for (cgraph_node
*n
: self_gen_clones
)
4625 profile_count orig_count
= n
->count
;
4626 profile_count new_count
4627 = (redist_sum
.apply_scale (1, self_gen_clones
.length ())
4628 + other_edges_count
[i
]);
4629 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4630 n
->count
= new_count
;
4631 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4632 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4634 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4635 processed_edges
.add (cs
);
4637 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4638 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4643 /* There are still going to be edges to ORIG_NODE that have one or more
4644 clones coming from another node clone in SELF_GEN_CLONES and which we
4645 scaled by the same amount, which means that the total incoming sum of
4646 counts to ORIG_NODE will be too high, scale such edges back. */
4647 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4649 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4652 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4653 if (e
->callee
->ultimate_alias_target () == orig_node
4654 && processed_edges
.contains (e
))
4657 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4658 if (e
->callee
->ultimate_alias_target () == orig_node
4659 && processed_edges
.contains (e
))
4660 e
->count
= e
->count
.apply_scale (1, den
);
4664 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4665 along self-recursive edges are likely to have fairly low count and so
4666 edges from them to nodes in the self_gen_clones do not correspond to the
4667 artificially distributed count of the nodes, the total sum of incoming
4668 edges to some clones might be too low. Detect this situation and correct
4670 for (cgraph_node
*n
: self_gen_clones
)
4672 if (!(n
->count
.ipa () > profile_count::zero ()))
4675 desc_incoming_count_struct desc
;
4676 desc
.orig
= orig_node
;
4677 desc
.processed_edges
= &processed_edges
;
4678 desc
.count
= profile_count::zero ();
4679 desc
.unproc_orig_rec_edges
= 0;
4680 analyze_clone_icoming_counts (n
, &desc
);
4682 if (n
->count
.differs_from_p (desc
.count
))
4684 if (n
->count
> desc
.count
4685 && desc
.unproc_orig_rec_edges
> 0)
4687 desc
.count
= n
->count
- desc
.count
;
4689 = desc
.count
.apply_scale (1, desc
.unproc_orig_rec_edges
);
4690 adjust_clone_incoming_counts (n
, &desc
);
4694 " Unable to fix up incoming counts for %s.\n",
4700 for (cgraph_node
*n
: self_gen_clones
)
4701 dump_profile_updates (n
, n
!= orig_node
);
4705 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4706 their profile information to reflect this. This function should not be used
4707 for clones generated for arithmetic pass-through jump functions on a
4708 self-recursive call graph edge, that situation is handled by
4709 update_counts_for_self_gen_clones. */
4712 update_profiling_info (struct cgraph_node
*orig_node
,
4713 struct cgraph_node
*new_node
)
4715 struct caller_statistics stats
;
4716 profile_count new_sum
;
4717 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4719 if (!(orig_node_count
> profile_count::zero ()))
4724 fprintf (dump_file
, " Updating profile from original count: ");
4725 orig_node_count
.dump (dump_file
);
4726 fprintf (dump_file
, "\n");
4729 init_caller_stats (&stats
, new_node
);
4730 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4732 new_sum
= stats
.count_sum
;
4734 if (new_sum
> orig_node_count
)
4736 /* TODO: Perhaps this should be gcc_unreachable ()? */
4737 remainder
= profile_count::zero ().guessed_local ();
4739 else if (stats
.rec_count_sum
.nonzero_p ())
4741 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4742 /* There are self-recursive edges which are likely to bring in the
4743 majority of calls but which we must divide in between the original and
4745 init_caller_stats (&stats
, orig_node
);
4746 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4748 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4749 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4751 if (orig_node
->local
)
4753 if (!orig_nonrec_call_count
.nonzero_p ())
4756 fprintf (dump_file
, " The original is local and the only "
4757 "incoming edges from non-dead callers with nonzero "
4758 "counts are self-recursive, assuming it is cold.\n");
4759 /* The NEW_NODE count and counts of all its outgoing edges
4760 are still unmodified copies of ORIG_NODE's. Just clear
4761 the latter and bail out. */
4763 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4764 zero
= profile_count::zero ().guessed_local ();
4766 zero
= profile_count::adjusted_zero ();
4767 orig_node
->count
= zero
;
4768 for (cgraph_edge
*cs
= orig_node
->callees
;
4770 cs
= cs
->next_callee
)
4772 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4774 cs
= cs
->next_callee
)
4781 /* Let's behave as if there was another caller that accounts for all
4782 the calls that were either indirect or from other compilation
4784 orig_nonrec_calls
++;
4785 profile_count pretend_caller_count
4786 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4787 - stats
.rec_count_sum
);
4788 orig_nonrec_call_count
+= pretend_caller_count
;
4791 /* Divide all "unexplained" counts roughly proportionally to sums of
4792 counts of non-recursive calls.
4794 We put rather arbitrary limits on how many counts we claim because the
4795 number of non-self-recursive incoming count is only a rough guideline
4796 and there are cases (such as mcf) where using it blindly just takes
4797 too many. And if lattices are considered in the opposite order we
4798 could also take too few. */
4799 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4801 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4802 profile_count new_part
4803 = MAX(MIN (unexp
.apply_scale (new_sum
,
4804 new_sum
+ orig_nonrec_call_count
),
4805 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4806 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4809 fprintf (dump_file
, " Claiming ");
4810 new_part
.dump (dump_file
);
4811 fprintf (dump_file
, " of unexplained ");
4812 unexp
.dump (dump_file
);
4813 fprintf (dump_file
, " counts because of self-recursive "
4816 new_sum
+= new_part
;
4817 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4821 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4824 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4825 new_node
->count
= new_sum
;
4826 orig_node
->count
= remainder
;
4828 profile_count orig_new_node_count
= orig_node_count
;
4829 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4830 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4831 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4832 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4833 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4835 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4836 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4837 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4838 for (cgraph_edge
*cs
= orig_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4839 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4843 dump_profile_updates (new_node
, true);
4844 dump_profile_updates (orig_node
, false);
4848 /* Update the respective profile of specialized NEW_NODE and the original
4849 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4850 have been redirected to the specialized version. */
4853 update_specialized_profile (struct cgraph_node
*new_node
,
4854 struct cgraph_node
*orig_node
,
4855 profile_count redirected_sum
)
4857 struct cgraph_edge
*cs
;
4858 profile_count new_node_count
, orig_node_count
= orig_node
->count
;
4862 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4863 redirected_sum
.dump (dump_file
);
4864 fprintf (dump_file
, "\n");
4866 if (!(orig_node_count
> profile_count::zero ()))
4869 gcc_assert (orig_node_count
>= redirected_sum
);
4871 new_node_count
= new_node
->count
;
4872 new_node
->count
+= redirected_sum
;
4873 orig_node
->count
-= redirected_sum
;
4875 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4876 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4878 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4880 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4887 dump_profile_updates (new_node
, true);
4888 dump_profile_updates (orig_node
, false);
4892 static void adjust_references_in_caller (cgraph_edge
*cs
,
4893 symtab_node
*symbol
, int index
);
4895 /* Simple structure to pass a symbol and index (with same meaning as parameters
4896 of adjust_references_in_caller) through a void* parameter of a
4897 call_for_symbol_thunks_and_aliases callback. */
4898 struct symbol_and_index_together
4900 symtab_node
*symbol
;
4904 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4905 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4907 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4909 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4910 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4911 if (!cs
->caller
->thunk
)
4912 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4916 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4917 variable which is only dereferenced and which is represented by SYMBOL. See
4918 if we can remove ADDR reference in callers assosiated witht the call. */
4921 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4923 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4924 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4925 if (jfunc
->type
== IPA_JF_CONST
)
4927 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
4931 to_del
->remove_reference ();
4933 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4934 cs
->caller
->dump_name (), symbol
->dump_name ());
4938 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
4939 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
4942 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
4943 cgraph_node
*caller
= cs
->caller
;
4944 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
4945 /* TODO: This consistency check may be too big and not really
4946 that useful. Consider removing it. */
4948 if (caller_info
->ipcp_orig_node
)
4949 cst
= caller_info
->known_csts
[fidx
];
4952 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
4953 gcc_assert (lat
->is_single_const ());
4954 cst
= lat
->values
->value
;
4956 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
4957 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
4960 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
4961 if (cuses
== IPA_UNDESCRIBED_USE
)
4963 gcc_assert (cuses
> 0);
4965 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
4969 if (caller_info
->ipcp_orig_node
)
4971 /* Cloning machinery has created a reference here, we need to either
4972 remove it or change it to a read one. */
4973 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0);
4974 if (to_del
&& to_del
->use
== IPA_REF_ADDR
)
4976 to_del
->remove_reference ();
4978 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4979 cs
->caller
->dump_name (), symbol
->dump_name ());
4980 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
4982 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
4985 " ...and replaced it with LOAD one.\n");
4990 symbol_and_index_together pack
;
4991 pack
.symbol
= symbol
;
4993 if (caller
->can_change_signature
)
4994 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
4999 /* Return true if we would like to remove a parameter from NODE when cloning it
5000 with KNOWN_CSTS scalar constants. */
5003 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5005 auto_vec
<bool, 16> surviving
;
5006 bool filled_vec
= false;
5007 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5008 int i
, count
= ipa_get_param_count (info
);
5010 for (i
= 0; i
< count
; i
++)
5012 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5017 clone_info
*info
= clone_info::get (node
);
5018 if (!info
|| !info
->param_adjustments
)
5020 info
->param_adjustments
->get_surviving_params (&surviving
);
5023 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5029 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5030 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5031 redirect all edges in CALLERS to it. */
5033 static struct cgraph_node
*
5034 create_specialized_node (struct cgraph_node
*node
,
5035 vec
<tree
> known_csts
,
5036 vec
<ipa_polymorphic_call_context
> known_contexts
,
5037 struct ipa_agg_replacement_value
*aggvals
,
5038 vec
<cgraph_edge
*> &callers
)
5040 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5041 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5042 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5043 struct ipa_agg_replacement_value
*av
;
5044 struct cgraph_node
*new_node
;
5045 int i
, count
= ipa_get_param_count (info
);
5046 clone_info
*cinfo
= clone_info::get (node
);
5047 ipa_param_adjustments
*old_adjustments
= cinfo
5048 ? cinfo
->param_adjustments
: NULL
;
5049 ipa_param_adjustments
*new_adjustments
;
5050 gcc_assert (!info
->ipcp_orig_node
);
5051 gcc_assert (node
->can_change_signature
5052 || !old_adjustments
);
5054 if (old_adjustments
)
5056 /* At the moment all IPA optimizations should use the number of
5057 parameters of the prevailing decl as the m_always_copy_start.
5058 Handling any other value would complicate the code below, so for the
5059 time bing let's only assert it is so. */
5060 gcc_assert (old_adjustments
->m_always_copy_start
== count
5061 || old_adjustments
->m_always_copy_start
< 0);
5062 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5063 for (i
= 0; i
< old_adj_count
; i
++)
5065 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5066 if (!node
->can_change_signature
5067 || old_adj
->op
!= IPA_PARAM_OP_COPY
5068 || (!known_csts
[old_adj
->base_index
]
5069 && ipa_is_param_used (info
, old_adj
->base_index
)))
5071 ipa_adjusted_param new_adj
= *old_adj
;
5073 new_adj
.prev_clone_adjustment
= true;
5074 new_adj
.prev_clone_index
= i
;
5075 vec_safe_push (new_params
, new_adj
);
5078 bool skip_return
= old_adjustments
->m_skip_return
;
5079 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5080 ipa_param_adjustments (new_params
, count
,
5083 else if (node
->can_change_signature
5084 && want_remove_some_param_p (node
, known_csts
))
5086 ipa_adjusted_param adj
;
5087 memset (&adj
, 0, sizeof (adj
));
5088 adj
.op
= IPA_PARAM_OP_COPY
;
5089 for (i
= 0; i
< count
; i
++)
5090 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5093 adj
.prev_clone_index
= i
;
5094 vec_safe_push (new_params
, adj
);
5096 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5097 ipa_param_adjustments (new_params
, count
, false));
5100 new_adjustments
= NULL
;
5102 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5103 for (i
= 0; i
< count
; i
++)
5105 tree t
= known_csts
[i
];
5109 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5111 bool load_ref
= false;
5112 symtab_node
*ref_symbol
;
5113 if (TREE_CODE (t
) == ADDR_EXPR
)
5115 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5116 if (TREE_CODE (base
) == VAR_DECL
5117 && ipa_get_controlled_uses (info
, i
) == 0
5118 && ipa_get_param_load_dereferenced (info
, i
)
5119 && (ref_symbol
= symtab_node::get (base
)))
5122 if (node
->can_change_signature
)
5123 for (cgraph_edge
*caller
: callers
)
5124 adjust_references_in_caller (caller
, ref_symbol
, i
);
5128 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5130 vec_safe_push (replace_trees
, replace_map
);
5132 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5133 for (i
= callers
.length () - 1; i
>= 0; i
--)
5135 cgraph_edge
*cs
= callers
[i
];
5136 if (cs
->caller
== node
)
5138 self_recursive_calls
.safe_push (cs
);
5139 callers
.unordered_remove (i
);
5143 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5144 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5146 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5147 new_adjustments
, "constprop",
5151 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5152 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5154 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5155 /* Cloned edges can disappear during cloning as speculation can be
5156 resolved, check that we have one and that it comes from the last
5158 if (cs
&& cs
->caller
== new_node
)
5159 cs
->redirect_callee_duplicating_thunks (new_node
);
5160 /* Any future code that would make more than one clone of an outgoing
5161 edge would confuse this mechanism, so let's check that does not
5163 gcc_checking_assert (!cs
5164 || !get_next_cgraph_edge_clone (cs
)
5165 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5167 if (have_self_recursive_calls
)
5168 new_node
->expand_all_artificial_thunks ();
5170 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5171 for (av
= aggvals
; av
; av
= av
->next
)
5172 new_node
->maybe_create_reference (av
->value
, NULL
);
5174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5176 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5177 if (known_contexts
.exists ())
5179 for (i
= 0; i
< count
; i
++)
5180 if (!known_contexts
[i
].useless_p ())
5182 fprintf (dump_file
, " known ctx %i is ", i
);
5183 known_contexts
[i
].dump (dump_file
);
5187 ipa_dump_agg_replacement_values (dump_file
, aggvals
);
5190 new_info
= ipa_node_params_sum
->get (new_node
);
5191 new_info
->ipcp_orig_node
= node
;
5192 new_node
->ipcp_clone
= true;
5193 new_info
->known_csts
= known_csts
;
5194 new_info
->known_contexts
= known_contexts
;
5196 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
, aggvals
);
5201 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5202 pass-through function to itself when the cgraph_node involved is not an
5203 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5204 no-operation pass-through. */
5207 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5210 enum availability availability
;
5211 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5212 && availability
> AVAIL_INTERPOSABLE
5213 && jfunc
->type
== IPA_JF_PASS_THROUGH
5214 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5215 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5216 && ipa_node_params_sum
->get (cs
->caller
)
5217 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5222 /* Return true if JFUNC, which describes a part of an aggregate represented or
5223 pointed to by the i-th parameter of call CS, is a pass-through function to
5224 itself when the cgraph_node involved is not an IPA-CP clone.. When
5225 SIMPLE is true, further check if JFUNC is a simple no-operation
5229 self_recursive_agg_pass_through_p (cgraph_edge
*cs
, ipa_agg_jf_item
*jfunc
,
5230 int i
, bool simple
= true)
5232 enum availability availability
;
5233 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5234 && availability
> AVAIL_INTERPOSABLE
5235 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5236 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5237 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5238 && jfunc
->value
.pass_through
.formal_id
== i
5239 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5240 && ipa_node_params_sum
->get (cs
->caller
)
5241 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5246 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5247 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5250 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5251 vec
<tree
> &known_csts
,
5252 const vec
<cgraph_edge
*> &callers
)
5254 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5255 int i
, count
= ipa_get_param_count (info
);
5257 for (i
= 0; i
< count
; i
++)
5259 struct cgraph_edge
*cs
;
5260 tree newval
= NULL_TREE
;
5263 tree type
= ipa_get_type (info
, i
);
5265 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5268 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5270 struct ipa_jump_func
*jump_func
;
5273 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5275 || i
>= ipa_get_cs_argument_count (args
)
5277 && call_passes_through_thunk (cs
)))
5282 jump_func
= ipa_get_ith_jump_func (args
, i
);
5284 /* Besides simple pass-through jump function, arithmetic jump
5285 function could also introduce argument-direct-pass-through for
5286 self-feeding recursive call. For example,
5293 Given that i is 0, recursive propagation via (i & 1) also gets
5295 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5297 gcc_assert (newval
);
5298 t
= ipa_get_jf_arith_result (
5299 ipa_get_jf_pass_through_operation (jump_func
),
5301 ipa_get_jf_pass_through_operand (jump_func
),
5305 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5309 && !values_equal_for_ipcp_p (t
, newval
))
5310 || (!first
&& !newval
))
5322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5324 fprintf (dump_file
, " adding an extra known scalar value ");
5325 print_ipcp_constant_value (dump_file
, newval
);
5326 fprintf (dump_file
, " for ");
5327 ipa_dump_param (dump_file
, info
, i
);
5328 fprintf (dump_file
, "\n");
5331 known_csts
[i
] = newval
;
5336 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5337 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5341 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5342 vec
<ipa_polymorphic_call_context
>
5344 const vec
<cgraph_edge
*> &callers
)
5346 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5347 int i
, count
= ipa_get_param_count (info
);
5349 for (i
= 0; i
< count
; i
++)
5353 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5354 || (known_contexts
->exists ()
5355 && !(*known_contexts
)[i
].useless_p ()))
5358 ipa_polymorphic_call_context newval
;
5362 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5364 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5366 || i
>= ipa_get_cs_argument_count (args
))
5368 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5369 ipa_polymorphic_call_context ctx
;
5370 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5378 newval
.meet_with (ctx
);
5379 if (newval
.useless_p ())
5383 if (!newval
.useless_p ())
5385 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5387 fprintf (dump_file
, " adding an extra known polymorphic "
5389 print_ipcp_constant_value (dump_file
, newval
);
5390 fprintf (dump_file
, " for ");
5391 ipa_dump_param (dump_file
, info
, i
);
5392 fprintf (dump_file
, "\n");
5395 if (!known_contexts
->exists ())
5396 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5398 (*known_contexts
)[i
] = newval
;
5404 /* Go through PLATS and create a vector of values consisting of values and
5405 offsets (minus OFFSET) of lattices that contain only a single value. */
5407 static vec
<ipa_agg_value
>
5408 copy_plats_to_inter (class ipcp_param_lattices
*plats
, HOST_WIDE_INT offset
)
5410 vec
<ipa_agg_value
> res
= vNULL
;
5412 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5415 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
5416 if (aglat
->is_single_const ())
5418 struct ipa_agg_value ti
;
5419 ti
.offset
= aglat
->offset
- offset
;
5420 ti
.value
= aglat
->values
->value
;
5426 /* Intersect all values in INTER with single value lattices in PLATS (while
5427 subtracting OFFSET). */
5430 intersect_with_plats (class ipcp_param_lattices
*plats
,
5431 vec
<ipa_agg_value
> *inter
,
5432 HOST_WIDE_INT offset
)
5434 struct ipcp_agg_lattice
*aglat
;
5435 struct ipa_agg_value
*item
;
5438 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5444 aglat
= plats
->aggs
;
5445 FOR_EACH_VEC_ELT (*inter
, k
, item
)
5452 if (aglat
->offset
- offset
> item
->offset
)
5454 if (aglat
->offset
- offset
== item
->offset
)
5456 if (aglat
->is_single_const ())
5458 tree value
= aglat
->values
->value
;
5460 if (values_equal_for_ipcp_p (item
->value
, value
))
5465 aglat
= aglat
->next
;
5468 item
->value
= NULL_TREE
;
5472 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5473 vector result while subtracting OFFSET from the individual value offsets. */
5475 static vec
<ipa_agg_value
>
5476 agg_replacements_to_vector (struct cgraph_node
*node
, int index
,
5477 HOST_WIDE_INT offset
)
5479 struct ipa_agg_replacement_value
*av
;
5480 vec
<ipa_agg_value
> res
= vNULL
;
5482 for (av
= ipa_get_agg_replacements_for_node (node
); av
; av
= av
->next
)
5483 if (av
->index
== index
5484 && (av
->offset
- offset
) >= 0)
5486 struct ipa_agg_value item
;
5487 gcc_checking_assert (av
->value
);
5488 item
.offset
= av
->offset
- offset
;
5489 item
.value
= av
->value
;
5490 res
.safe_push (item
);
5496 /* Intersect all values in INTER with those that we have already scheduled to
5497 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5498 (while subtracting OFFSET). */
5501 intersect_with_agg_replacements (struct cgraph_node
*node
, int index
,
5502 vec
<ipa_agg_value
> *inter
,
5503 HOST_WIDE_INT offset
)
5505 struct ipa_agg_replacement_value
*srcvals
;
5506 struct ipa_agg_value
*item
;
5509 srcvals
= ipa_get_agg_replacements_for_node (node
);
5516 FOR_EACH_VEC_ELT (*inter
, i
, item
)
5518 struct ipa_agg_replacement_value
*av
;
5522 for (av
= srcvals
; av
; av
= av
->next
)
5524 gcc_checking_assert (av
->value
);
5525 if (av
->index
== index
5526 && av
->offset
- offset
== item
->offset
)
5528 if (values_equal_for_ipcp_p (item
->value
, av
->value
))
5534 item
->value
= NULL_TREE
;
5538 /* Intersect values in INTER with aggregate values that come along edge CS to
5539 parameter number INDEX and return it. If INTER does not actually exist yet,
5540 copy all incoming values to it. If we determine we ended up with no values
5541 whatsoever, return a released vector. */
5543 static vec
<ipa_agg_value
>
5544 intersect_aggregates_with_edge (struct cgraph_edge
*cs
, int index
,
5545 vec
<ipa_agg_value
> inter
)
5547 struct ipa_jump_func
*jfunc
;
5548 jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), index
);
5549 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5550 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5552 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5553 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5555 if (caller_info
->ipcp_orig_node
)
5557 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5558 class ipcp_param_lattices
*orig_plats
;
5559 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5560 orig_plats
= ipa_get_parm_lattices (orig_info
, src_idx
);
5561 if (agg_pass_through_permissible_p (orig_plats
, jfunc
))
5563 if (!inter
.exists ())
5564 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, 0);
5566 intersect_with_agg_replacements (cs
->caller
, src_idx
,
5573 class ipcp_param_lattices
*src_plats
;
5574 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5575 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
5577 /* Currently we do not produce clobber aggregate jump
5578 functions, adjust when we do. */
5579 gcc_checking_assert (!jfunc
->agg
.items
);
5580 if (!inter
.exists ())
5581 inter
= copy_plats_to_inter (src_plats
, 0);
5583 intersect_with_plats (src_plats
, &inter
, 0);
5588 else if (jfunc
->type
== IPA_JF_ANCESTOR
5589 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5591 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5592 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5593 class ipcp_param_lattices
*src_plats
;
5594 HOST_WIDE_INT delta
= ipa_get_jf_ancestor_offset (jfunc
);
5596 if (caller_info
->ipcp_orig_node
)
5598 if (!inter
.exists ())
5599 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, delta
);
5601 intersect_with_agg_replacements (cs
->caller
, src_idx
, &inter
,
5606 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5607 /* Currently we do not produce clobber aggregate jump
5608 functions, adjust when we do. */
5609 gcc_checking_assert (!src_plats
->aggs
|| !jfunc
->agg
.items
);
5610 if (!inter
.exists ())
5611 inter
= copy_plats_to_inter (src_plats
, delta
);
5613 intersect_with_plats (src_plats
, &inter
, delta
);
5618 if (jfunc
->agg
.items
)
5620 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5621 struct ipa_agg_value
*item
;
5624 if (!inter
.exists ())
5625 for (unsigned i
= 0; i
< jfunc
->agg
.items
->length (); i
++)
5627 struct ipa_agg_jf_item
*agg_item
= &(*jfunc
->agg
.items
)[i
];
5628 tree value
= ipa_agg_value_from_node (caller_info
, cs
->caller
,
5632 struct ipa_agg_value agg_value
;
5634 agg_value
.value
= value
;
5635 agg_value
.offset
= agg_item
->offset
;
5636 inter
.safe_push (agg_value
);
5640 FOR_EACH_VEC_ELT (inter
, k
, item
)
5648 while ((unsigned) l
< jfunc
->agg
.items
->length ())
5650 struct ipa_agg_jf_item
*ti
;
5651 ti
= &(*jfunc
->agg
.items
)[l
];
5652 if (ti
->offset
> item
->offset
)
5654 if (ti
->offset
== item
->offset
)
5658 /* Besides simple pass-through aggregate jump function,
5659 arithmetic aggregate jump function could also bring
5660 same aggregate value as parameter passed-in for
5661 self-feeding recursive call. For example,
5669 Given that *i is 0, recursive propagation via (*i & 1)
5671 if (self_recursive_agg_pass_through_p (cs
, ti
, index
,
5673 value
= ipa_get_jf_arith_result (
5674 ti
->value
.pass_through
.operation
,
5676 ti
->value
.pass_through
.operand
,
5679 value
= ipa_agg_value_from_node (caller_info
,
5682 if (value
&& values_equal_for_ipcp_p (item
->value
, value
))
5700 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5701 from all of them. */
5703 static struct ipa_agg_replacement_value
*
5704 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5705 const vec
<cgraph_edge
*> &callers
)
5707 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5708 struct ipa_agg_replacement_value
*res
;
5709 struct ipa_agg_replacement_value
**tail
= &res
;
5710 struct cgraph_edge
*cs
;
5711 int i
, j
, count
= ipa_get_param_count (dest_info
);
5713 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5715 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5721 int c
= ipa_get_cs_argument_count (args
);
5726 for (i
= 0; i
< count
; i
++)
5728 struct cgraph_edge
*cs
;
5729 vec
<ipa_agg_value
> inter
= vNULL
;
5730 struct ipa_agg_value
*item
;
5731 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, i
);
5734 /* Among other things, the following check should deal with all by_ref
5736 if (plats
->aggs_bottom
)
5739 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5741 struct ipa_jump_func
*jfunc
5742 = ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), i
);
5743 if (self_recursive_pass_through_p (cs
, jfunc
, i
)
5744 && (!plats
->aggs_by_ref
5745 || ipa_get_jf_pass_through_agg_preserved (jfunc
)))
5747 inter
= intersect_aggregates_with_edge (cs
, i
, inter
);
5749 if (!inter
.exists ())
5753 FOR_EACH_VEC_ELT (inter
, j
, item
)
5755 struct ipa_agg_replacement_value
*v
;
5760 v
= ggc_alloc
<ipa_agg_replacement_value
> ();
5762 v
->offset
= item
->offset
;
5763 v
->value
= item
->value
;
5764 v
->by_ref
= plats
->aggs_by_ref
;
5770 if (inter
.exists ())
5777 /* Determine whether CS also brings all scalar values that the NODE is
5781 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5782 struct cgraph_node
*node
)
5784 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5785 int count
= ipa_get_param_count (dest_info
);
5786 class ipa_node_params
*caller_info
;
5787 class ipa_edge_args
*args
;
5790 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5791 args
= ipa_edge_args_sum
->get (cs
);
5792 for (i
= 0; i
< count
; i
++)
5794 struct ipa_jump_func
*jump_func
;
5797 val
= dest_info
->known_csts
[i
];
5801 if (i
>= ipa_get_cs_argument_count (args
))
5803 jump_func
= ipa_get_ith_jump_func (args
, i
);
5804 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5805 ipa_get_type (dest_info
, i
));
5806 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5812 /* Determine whether CS also brings all aggregate values that NODE is
5815 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5816 struct cgraph_node
*node
)
5818 struct ipa_agg_replacement_value
*aggval
;
5821 aggval
= ipa_get_agg_replacements_for_node (node
);
5825 ipa_node_params
*clone_node_info
= ipa_node_params_sum
->get (node
);
5826 count
= ipa_get_param_count (clone_node_info
);
5827 ec
= ipa_get_cs_argument_count (ipa_edge_args_sum
->get (cs
));
5829 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5830 if (aggval
->index
>= ec
)
5833 ipa_node_params
*orig_node_info
5834 = ipa_node_params_sum
->get (clone_node_info
->ipcp_orig_node
);
5836 for (i
= 0; i
< count
; i
++)
5838 class ipcp_param_lattices
*plats
;
5839 bool interesting
= false;
5840 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5841 if (aggval
->index
== i
)
5849 plats
= ipa_get_parm_lattices (orig_node_info
, aggval
->index
);
5850 if (plats
->aggs_bottom
)
5853 vec
<ipa_agg_value
> values
= intersect_aggregates_with_edge (cs
, i
, vNULL
);
5854 if (!values
.exists ())
5857 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5858 if (aggval
->index
== i
)
5860 struct ipa_agg_value
*item
;
5863 FOR_EACH_VEC_ELT (values
, j
, item
)
5865 && item
->offset
== av
->offset
5866 && values_equal_for_ipcp_p (item
->value
, av
->value
))
5882 /* Given an original NODE and a VAL for which we have already created a
5883 specialized clone, look whether there are incoming edges that still lead
5884 into the old node but now also bring the requested value and also conform to
5885 all other criteria such that they can be redirected the special node.
5886 This function can therefore redirect the final edge in a SCC. */
5888 template <typename valtype
>
5890 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5892 ipcp_value_source
<valtype
> *src
;
5893 profile_count redirected_sum
= profile_count::zero ();
5895 for (src
= val
->sources
; src
; src
= src
->next
)
5897 struct cgraph_edge
*cs
= src
->cs
;
5900 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5901 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5902 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5905 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5906 cs
->caller
->dump_name (),
5907 val
->spec_node
->dump_name ());
5909 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5910 val
->spec_node
->expand_all_artificial_thunks ();
5911 if (cs
->count
.ipa ().initialized_p ())
5912 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5914 cs
= get_next_cgraph_edge_clone (cs
);
5918 if (redirected_sum
.nonzero_p ())
5919 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5922 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5925 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5927 ipa_polymorphic_call_context
*ctx
;
5930 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5931 if (!ctx
->useless_p ())
5936 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5938 static vec
<ipa_polymorphic_call_context
>
5939 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5941 if (known_contexts_useful_p (known_contexts
))
5942 return known_contexts
.copy ();
5947 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5948 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5952 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5953 vec
<tree
> *known_csts
,
5954 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5955 ipcp_value
<tree
> *val
, int index
)
5957 *known_csts
= avals
->m_known_vals
.copy ();
5958 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5959 (*known_csts
)[index
] = val
->value
;
5962 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5963 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5967 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5968 vec
<tree
> *known_csts
,
5969 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5970 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5973 *known_csts
= avals
->m_known_vals
.copy ();
5974 *known_contexts
= avals
->m_known_contexts
.copy ();
5975 (*known_contexts
)[index
] = val
->value
;
5978 /* Return true if OFFSET indicates this was not an aggregate value or there is
5979 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5983 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*aggvals
,
5984 int index
, HOST_WIDE_INT offset
, tree value
)
5991 if (aggvals
->index
== index
5992 && aggvals
->offset
== offset
5993 && values_equal_for_ipcp_p (aggvals
->value
, value
))
5995 aggvals
= aggvals
->next
;
6000 /* Return true if offset is minus one because source of a polymorphic context
6001 cannot be an aggregate value. */
6004 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*,
6005 int , HOST_WIDE_INT offset
,
6006 ipa_polymorphic_call_context
)
6008 return offset
== -1;
6011 /* Decide whether to create a special version of NODE for value VAL of
6012 parameter at the given INDEX. If OFFSET is -1, the value is for the
6013 parameter itself, otherwise it is stored at the given OFFSET of the
6014 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6015 is a vector which contains clones created for self-recursive calls with an
6016 arithmetic pass-through jump function. */
6018 template <typename valtype
>
6020 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6021 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6022 vec
<cgraph_node
*> *self_gen_clones
)
6024 struct ipa_agg_replacement_value
*aggvals
;
6027 profile_count count_sum
, rec_count_sum
;
6028 vec
<cgraph_edge
*> callers
;
6032 perhaps_add_new_callers (node
, val
);
6035 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6037 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6038 fprintf (dump_file
, " Ignoring candidate value because "
6039 "maximum unit size would be reached with %li.\n",
6040 val
->local_size_cost
+ overall_size
);
6043 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6044 &rec_count_sum
, &count_sum
))
6047 if (!dbg_cnt (ipa_cp_values
))
6050 if (val
->self_recursion_generated_p ())
6052 /* The edge counts in this case might not have been adjusted yet.
6053 Nevertleless, even if they were it would be only a guesswork which we
6054 can do now. The recursive part of the counts can be derived from the
6055 count of the original node anyway. */
6056 if (node
->count
.ipa ().nonzero_p ())
6058 unsigned dem
= self_gen_clones
->length () + 1;
6059 rec_count_sum
= node
->count
.ipa ().apply_scale (1, dem
);
6062 rec_count_sum
= profile_count::zero ();
6065 /* get_info_about_necessary_edges only sums up ipa counts. */
6066 count_sum
+= rec_count_sum
;
6068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6070 fprintf (dump_file
, " - considering value ");
6071 print_ipcp_constant_value (dump_file
, val
->value
);
6072 fprintf (dump_file
, " for ");
6073 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6075 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6076 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6079 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6080 freq_sum
, count_sum
,
6081 val
->local_size_cost
)
6082 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6083 freq_sum
, count_sum
, val
->prop_size_cost
))
6087 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6088 node
->dump_name ());
6090 vec
<tree
> known_csts
;
6091 vec
<ipa_polymorphic_call_context
> known_contexts
;
6093 callers
= gather_edges_for_value (val
, node
, caller_count
);
6095 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6098 known_csts
= avals
->m_known_vals
.copy ();
6099 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6101 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6102 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6103 aggvals
= find_aggregate_values_for_callers_subset (node
, callers
);
6104 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6105 offset
, val
->value
));
6106 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6109 if (val
->self_recursion_generated_p ())
6110 self_gen_clones
->safe_push (val
->spec_node
);
6112 update_profiling_info (node
, val
->spec_node
);
6115 overall_size
+= val
->local_size_cost
;
6116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6117 fprintf (dump_file
, " overall size reached %li\n",
6120 /* TODO: If for some lattice there is only one other known value
6121 left, make a special node for it too. */
6126 /* Decide whether and what specialized clones of NODE should be created. */
6129 decide_whether_version_node (struct cgraph_node
*node
)
6131 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6132 int i
, count
= ipa_get_param_count (info
);
6138 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6139 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6140 node
->dump_name ());
6142 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6143 ipa_auto_call_arg_values avals
;
6144 gather_context_independent_values (info
, &avals
, false, NULL
);
6146 for (i
= 0; i
< count
;i
++)
6148 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6149 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6150 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6153 && !avals
.m_known_vals
[i
])
6155 ipcp_value
<tree
> *val
;
6156 for (val
= lat
->values
; val
; val
= val
->next
)
6157 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6161 if (!plats
->aggs_bottom
)
6163 struct ipcp_agg_lattice
*aglat
;
6164 ipcp_value
<tree
> *val
;
6165 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6166 if (!aglat
->bottom
&& aglat
->values
6167 /* If the following is false, the one value has been considered
6168 for cloning for all contexts. */
6169 && (plats
->aggs_contain_variable
6170 || !aglat
->is_single_const ()))
6171 for (val
= aglat
->values
; val
; val
= val
->next
)
6172 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6177 && avals
.m_known_contexts
[i
].useless_p ())
6179 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6180 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6181 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6186 if (!self_gen_clones
.is_empty ())
6188 self_gen_clones
.safe_push (node
);
6189 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6192 if (info
->do_clone_for_all_contexts
)
6194 if (!dbg_cnt (ipa_cp_values
))
6196 info
->do_clone_for_all_contexts
= false;
6200 struct cgraph_node
*clone
;
6201 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6203 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6205 cgraph_edge
*cs
= callers
[i
];
6206 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6208 if (caller_info
&& caller_info
->node_dead
)
6209 callers
.unordered_remove (i
);
6212 if (!adjust_callers_for_value_intersection (callers
, node
))
6214 /* If node is not called by anyone, or all its caller edges are
6215 self-recursive, the node is not really in use, no need to do
6217 info
->do_clone_for_all_contexts
= false;
6222 fprintf (dump_file
, " - Creating a specialized node of %s "
6223 "for all known contexts.\n", node
->dump_name ());
6225 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6226 vec
<ipa_polymorphic_call_context
> known_contexts
6227 = copy_useful_known_contexts (avals
.m_known_contexts
);
6228 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6229 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6230 ipa_agg_replacement_value
*aggvals
6231 = find_aggregate_values_for_callers_subset (node
, callers
);
6233 if (!known_contexts_useful_p (known_contexts
))
6235 known_contexts
.release ();
6236 known_contexts
= vNULL
;
6238 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6240 info
->do_clone_for_all_contexts
= false;
6241 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6248 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6251 spread_undeadness (struct cgraph_node
*node
)
6253 struct cgraph_edge
*cs
;
6255 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6256 if (ipa_edge_within_scc (cs
))
6258 struct cgraph_node
*callee
;
6259 class ipa_node_params
*info
;
6261 callee
= cs
->callee
->function_symbol (NULL
);
6262 info
= ipa_node_params_sum
->get (callee
);
6264 if (info
&& info
->node_dead
)
6266 info
->node_dead
= 0;
6267 spread_undeadness (callee
);
6272 /* Return true if NODE has a caller from outside of its SCC that is not
6273 dead. Worker callback for cgraph_for_node_and_aliases. */
6276 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6277 void *data ATTRIBUTE_UNUSED
)
6279 struct cgraph_edge
*cs
;
6281 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6282 if (cs
->caller
->thunk
6283 && cs
->caller
->call_for_symbol_thunks_and_aliases
6284 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6286 else if (!ipa_edge_within_scc (cs
))
6288 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6289 if (!caller_info
/* Unoptimized caller are like dead ones. */
6290 || !caller_info
->node_dead
)
6297 /* Identify nodes within the same SCC as NODE which are no longer needed
6298 because of new clones and will be removed as unreachable. */
6301 identify_dead_nodes (struct cgraph_node
*node
)
6303 struct cgraph_node
*v
;
6304 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6307 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6309 && !v
->call_for_symbol_thunks_and_aliases
6310 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6311 info
->node_dead
= 1;
6314 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6316 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6317 if (info
&& !info
->node_dead
)
6318 spread_undeadness (v
);
6321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6323 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6324 if (ipa_node_params_sum
->get (v
)
6325 && ipa_node_params_sum
->get (v
)->node_dead
)
6326 fprintf (dump_file
, " Marking node as dead: %s.\n",
6331 /* The decision stage. Iterate over the topological order of call graph nodes
6332 TOPO and make specialized clones if deemed beneficial. */
6335 ipcp_decision_stage (class ipa_topo_info
*topo
)
6340 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6342 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6344 struct cgraph_node
*node
= topo
->order
[i
];
6345 bool change
= false, iterate
= true;
6349 struct cgraph_node
*v
;
6351 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6352 if (v
->has_gimple_body_p ()
6353 && ipcp_versionable_function_p (v
))
6354 iterate
|= decide_whether_version_node (v
);
6359 identify_dead_nodes (node
);
6363 /* Look up all the bits information that we have discovered and copy it over
6364 to the transformation summary. */
6367 ipcp_store_bits_results (void)
6371 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6373 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6374 bool dumped_sth
= false;
6375 bool found_useful_result
= false;
6377 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
6380 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
6381 "; -fipa-bit-cp: disabled.\n",
6382 node
->dump_name ());
6386 if (info
->ipcp_orig_node
)
6387 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6388 if (!info
->lattices
)
6389 /* Newly expanded artificial thunks do not have lattices. */
6392 unsigned count
= ipa_get_param_count (info
);
6393 for (unsigned i
= 0; i
< count
; i
++)
6395 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6396 if (plats
->bits_lattice
.constant_p ())
6398 found_useful_result
= true;
6403 if (!found_useful_result
)
6406 ipcp_transformation_initialize ();
6407 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6408 vec_safe_reserve_exact (ts
->bits
, count
);
6410 for (unsigned i
= 0; i
< count
; i
++)
6412 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6415 if (plats
->bits_lattice
.constant_p ())
6418 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
6419 plats
->bits_lattice
.get_mask ());
6420 if (!dbg_cnt (ipa_cp_bits
))
6426 ts
->bits
->quick_push (jfbits
);
6427 if (!dump_file
|| !jfbits
)
6431 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6432 node
->dump_name ());
6435 fprintf (dump_file
, " param %i: value = ", i
);
6436 print_hex (jfbits
->value
, dump_file
);
6437 fprintf (dump_file
, ", mask = ");
6438 print_hex (jfbits
->mask
, dump_file
);
6439 fprintf (dump_file
, "\n");
6444 /* Look up all VR information that we have discovered and copy it over
6445 to the transformation summary. */
6448 ipcp_store_vr_results (void)
6452 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6454 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6455 bool found_useful_result
= false;
6457 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6460 fprintf (dump_file
, "Not considering %s for VR discovery "
6461 "and propagate; -fipa-ipa-vrp: disabled.\n",
6462 node
->dump_name ());
6466 if (info
->ipcp_orig_node
)
6467 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6468 if (!info
->lattices
)
6469 /* Newly expanded artificial thunks do not have lattices. */
6472 unsigned count
= ipa_get_param_count (info
);
6473 for (unsigned i
= 0; i
< count
; i
++)
6475 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6476 if (!plats
->m_value_range
.bottom_p ()
6477 && !plats
->m_value_range
.top_p ())
6479 found_useful_result
= true;
6483 if (!found_useful_result
)
6486 ipcp_transformation_initialize ();
6487 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6488 vec_safe_reserve_exact (ts
->m_vr
, count
);
6490 for (unsigned i
= 0; i
< count
; i
++)
6492 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6495 if (!plats
->m_value_range
.bottom_p ()
6496 && !plats
->m_value_range
.top_p ()
6497 && dbg_cnt (ipa_cp_vr
))
6500 vr
.type
= plats
->m_value_range
.m_vr
.kind ();
6501 vr
.min
= wi::to_wide (plats
->m_value_range
.m_vr
.min ());
6502 vr
.max
= wi::to_wide (plats
->m_value_range
.m_vr
.max ());
6507 vr
.type
= VR_VARYING
;
6508 vr
.min
= vr
.max
= wi::zero (INT_TYPE_SIZE
);
6510 ts
->m_vr
->quick_push (vr
);
6515 /* The IPCP driver. */
6520 class ipa_topo_info topo
;
6522 if (edge_clone_summaries
== NULL
)
6523 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6525 ipa_check_create_node_params ();
6526 ipa_check_create_edge_args ();
6527 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6531 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6532 if (dump_flags
& TDF_DETAILS
)
6533 ipa_print_all_params (dump_file
);
6534 ipa_print_all_jump_functions (dump_file
);
6537 /* Topological sort. */
6538 build_toporder_info (&topo
);
6539 /* Do the interprocedural propagation. */
6540 ipcp_propagate_stage (&topo
);
6541 /* Decide what constant propagation and cloning should be performed. */
6542 ipcp_decision_stage (&topo
);
6543 /* Store results of bits propagation. */
6544 ipcp_store_bits_results ();
6545 /* Store results of value range propagation. */
6546 ipcp_store_vr_results ();
6548 /* Free all IPCP structures. */
6549 delete clone_num_suffixes
;
6550 free_toporder_info (&topo
);
6551 delete edge_clone_summaries
;
6552 edge_clone_summaries
= NULL
;
6553 ipa_free_all_structures_after_ipa_cp ();
6555 fprintf (dump_file
, "\nIPA constant propagation end\n");
6559 /* Initialization and computation of IPCP data structures. This is the initial
6560 intraprocedural analysis of functions, which gathers information to be
6561 propagated later on. */
6564 ipcp_generate_summary (void)
6566 struct cgraph_node
*node
;
6569 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6570 ipa_register_cgraph_hooks ();
6572 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6573 ipa_analyze_node (node
);
6578 const pass_data pass_data_ipa_cp
=
6580 IPA_PASS
, /* type */
6582 OPTGROUP_NONE
, /* optinfo_flags */
6583 TV_IPA_CONSTANT_PROP
, /* tv_id */
6584 0, /* properties_required */
6585 0, /* properties_provided */
6586 0, /* properties_destroyed */
6587 0, /* todo_flags_start */
6588 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6591 class pass_ipa_cp
: public ipa_opt_pass_d
6594 pass_ipa_cp (gcc::context
*ctxt
)
6595 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6596 ipcp_generate_summary
, /* generate_summary */
6597 NULL
, /* write_summary */
6598 NULL
, /* read_summary */
6599 ipcp_write_transformation_summaries
, /*
6600 write_optimization_summary */
6601 ipcp_read_transformation_summaries
, /*
6602 read_optimization_summary */
6603 NULL
, /* stmt_fixup */
6604 0, /* function_transform_todo_flags_start */
6605 ipcp_transform_function
, /* function_transform */
6606 NULL
) /* variable_transform */
6609 /* opt_pass methods: */
6610 virtual bool gate (function
*)
6612 /* FIXME: We should remove the optimize check after we ensure we never run
6613 IPA passes when not optimizing. */
6614 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6617 virtual unsigned int execute (function
*) { return ipcp_driver (); }
6619 }; // class pass_ipa_cp
6624 make_pass_ipa_cp (gcc::context
*ctxt
)
6626 return new pass_ipa_cp (ctxt
);
6629 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6630 within the same process. For use by toplev::finalize. */
6633 ipa_cp_c_finalize (void)
6635 base_count
= profile_count::uninitialized ();
6637 orig_overall_size
= 0;
6638 ipcp_free_transformation_sum ();