1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
105 #include "coretypes.h"
108 #include "gimple-expr.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-iterator.h"
117 #include "gimple-fold.h"
118 #include "symbol-summary.h"
119 #include "tree-vrp.h"
120 #include "ipa-prop.h"
121 #include "tree-pretty-print.h"
122 #include "tree-inline.h"
123 #include "ipa-fnsummary.h"
124 #include "ipa-utils.h"
125 #include "tree-ssa-ccp.h"
126 #include "stringpool.h"
129 #include "symtab-clones.h"
131 template <typename valtype
> class ipcp_value
;
133 /* Describes a particular source for an IPA-CP value. */
135 template <typename valtype
>
136 struct ipcp_value_source
139 /* Aggregate offset of the source, negative if the source is scalar value of
140 the argument itself. */
141 HOST_WIDE_INT offset
;
142 /* The incoming edge that brought the value. */
144 /* If the jump function that resulted into his value was a pass-through or an
145 ancestor, this is the ipcp_value of the caller from which the described
146 value has been derived. Otherwise it is NULL. */
147 ipcp_value
<valtype
> *val
;
148 /* Next pointer in a linked list of sources of a value. */
149 ipcp_value_source
*next
;
150 /* If the jump function that resulted into his value was a pass-through or an
151 ancestor, this is the index of the parameter of the caller the jump
152 function references. */
156 /* Common ancestor for all ipcp_value instantiations. */
158 class ipcp_value_base
161 /* Time benefit and that specializing the function for this value would bring
162 about in this function alone. */
163 sreal local_time_benefit
;
164 /* Time benefit that specializing the function for this value can bring about
166 sreal prop_time_benefit
;
167 /* Size cost that specializing the function for this value would bring about
168 in this function alone. */
170 /* Size cost that specializing the function for this value can bring about in
175 : local_time_benefit (0), prop_time_benefit (0),
176 local_size_cost (0), prop_size_cost (0) {}
179 /* Describes one particular value stored in struct ipcp_lattice. */
181 template <typename valtype
>
182 class ipcp_value
: public ipcp_value_base
185 /* The actual value for the given parameter. */
187 /* The list of sources from which this value originates. */
188 ipcp_value_source
<valtype
> *sources
= nullptr;
189 /* Next pointers in a linked list of all values in a lattice. */
190 ipcp_value
*next
= nullptr;
191 /* Next pointers in a linked list of values in a strongly connected component
193 ipcp_value
*scc_next
= nullptr;
194 /* Next pointers in a linked list of SCCs of values sorted topologically
195 according their sources. */
196 ipcp_value
*topo_next
= nullptr;
197 /* A specialized node created for this value, NULL if none has been (so far)
199 cgraph_node
*spec_node
= nullptr;
200 /* Depth first search number and low link for topological sorting of
204 /* SCC number to identify values which recursively feed into each other.
205 Values in the same SCC have the same SCC number. */
207 /* Non zero if the value is generated from another value in the same lattice
208 for a self-recursive call, the actual number is how many times the
209 operation has been performed. In the unlikely event of the value being
210 present in two chains fo self-recursive value generation chains, it is the
212 unsigned self_recursion_generated_level
= 0;
213 /* True if this value is currently on the topo-sort stack. */
214 bool on_stack
= false;
216 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
217 HOST_WIDE_INT offset
);
219 /* Return true if both THIS value and O feed into each other. */
221 bool same_scc (const ipcp_value
<valtype
> *o
)
223 return o
->scc_no
== scc_no
;
226 /* Return true, if a this value has been generated for a self-recursive call as
227 a result of an arithmetic pass-through jump-function acting on a value in
228 the same lattice function. */
230 bool self_recursion_generated_p ()
232 return self_recursion_generated_level
> 0;
236 /* Lattice describing potential values of a formal parameter of a function, or
237 a part of an aggregate. TOP is represented by a lattice with zero values
238 and with contains_variable and bottom flags cleared. BOTTOM is represented
239 by a lattice with the bottom flag set. In that case, values and
240 contains_variable flag should be disregarded. */
242 template <typename valtype
>
246 /* The list of known values and types in this lattice. Note that values are
247 not deallocated if a lattice is set to bottom because there may be value
248 sources referencing them. */
249 ipcp_value
<valtype
> *values
;
250 /* Number of known values and types in this lattice. */
252 /* The lattice contains a variable component (in addition to values). */
253 bool contains_variable
;
254 /* The value of the lattice is bottom (i.e. variable and unusable for any
258 inline bool is_single_const ();
259 inline bool set_to_bottom ();
260 inline bool set_contains_variable ();
261 bool add_value (valtype newval
, cgraph_edge
*cs
,
262 ipcp_value
<valtype
> *src_val
= NULL
,
263 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
264 ipcp_value
<valtype
> **val_p
= NULL
,
265 unsigned same_lat_gen_level
= 0);
266 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
269 /* Lattice of tree values with an offset to describe a part of an
272 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
275 /* Offset that is being described by this lattice. */
276 HOST_WIDE_INT offset
;
277 /* Size so that we don't have to re-compute it every time we traverse the
278 list. Must correspond to TYPE_SIZE of all lat values. */
280 /* Next element of the linked list. */
281 struct ipcp_agg_lattice
*next
;
284 /* Lattice of known bits, only capable of holding one value.
285 Bitwise constant propagation propagates which bits of a
301 In the above case, the param 'x' will always have all
302 the bits (except the bits in lsb) set to 0.
303 Hence the mask of 'x' would be 0xff. The mask
304 reflects that the bits in lsb are unknown.
305 The actual propagated value is given by m_value & ~m_mask. */
307 class ipcp_bits_lattice
310 bool bottom_p () const { return m_lattice_val
== IPA_BITS_VARYING
; }
311 bool top_p () const { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
312 bool constant_p () const { return m_lattice_val
== IPA_BITS_CONSTANT
; }
313 bool set_to_bottom ();
314 bool set_to_constant (widest_int
, widest_int
);
315 bool known_nonzero_p () const;
317 widest_int
get_value () const { return m_value
; }
318 widest_int
get_mask () const { return m_mask
; }
320 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
321 enum tree_code
, tree
, bool);
323 bool meet_with (widest_int
, widest_int
, unsigned);
328 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
330 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
331 If a bit in mask is set to 0, then the corresponding bit in
332 value is known to be constant. */
333 widest_int m_value
, m_mask
;
335 bool meet_with_1 (widest_int
, widest_int
, unsigned, bool);
336 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
339 /* Lattice of value ranges. */
341 class ipcp_vr_lattice
346 inline bool bottom_p () const;
347 inline bool top_p () const;
348 inline bool set_to_bottom ();
349 bool meet_with (const value_range
*p_vr
);
350 bool meet_with (const ipcp_vr_lattice
&other
);
351 void init () { gcc_assert (m_vr
.undefined_p ()); }
352 void print (FILE * f
);
355 bool meet_with_1 (const value_range
*other_vr
);
358 /* Structure containing lattices for a parameter itself and for pieces of
359 aggregates that are passed in the parameter or by a reference in a parameter
360 plus some other useful flags. */
362 class ipcp_param_lattices
365 /* Lattice describing the value of the parameter itself. */
366 ipcp_lattice
<tree
> itself
;
367 /* Lattice describing the polymorphic contexts of a parameter. */
368 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
369 /* Lattices describing aggregate parts. */
370 ipcp_agg_lattice
*aggs
;
371 /* Lattice describing known bits. */
372 ipcp_bits_lattice bits_lattice
;
373 /* Lattice describing value range. */
374 ipcp_vr_lattice m_value_range
;
375 /* Number of aggregate lattices */
377 /* True if aggregate data were passed by reference (as opposed to by
380 /* All aggregate lattices contain a variable component (in addition to
382 bool aggs_contain_variable
;
383 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
384 for any propagation). */
387 /* There is a virtual call based on this parameter. */
391 /* Allocation pools for values and their sources in ipa-cp. */
393 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
394 ("IPA-CP constant values");
396 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
397 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
399 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
400 ("IPA-CP value sources");
402 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
403 ("IPA_CP aggregate lattices");
405 /* Base count to use in heuristics when using profile feedback. */
407 static profile_count base_count
;
409 /* Original overall size of the program. */
411 static long overall_size
, orig_overall_size
;
413 /* Node name to unique clone suffix number map. */
414 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
416 /* Return the param lattices structure corresponding to the Ith formal
417 parameter of the function described by INFO. */
418 static inline class ipcp_param_lattices
*
419 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
421 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
422 gcc_checking_assert (!info
->ipcp_orig_node
);
423 gcc_checking_assert (info
->lattices
);
424 return &(info
->lattices
[i
]);
427 /* Return the lattice corresponding to the scalar value of the Ith formal
428 parameter of the function described by INFO. */
429 static inline ipcp_lattice
<tree
> *
430 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
432 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
433 return &plats
->itself
;
436 /* Return the lattice corresponding to the scalar value of the Ith formal
437 parameter of the function described by INFO. */
438 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
439 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
441 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
442 return &plats
->ctxlat
;
445 /* Return whether LAT is a lattice with a single constant and without an
448 template <typename valtype
>
450 ipcp_lattice
<valtype
>::is_single_const ()
452 if (bottom
|| contains_variable
|| values_count
!= 1)
458 /* Print V which is extracted from a value in a lattice to F. */
461 print_ipcp_constant_value (FILE * f
, tree v
)
463 if (TREE_CODE (v
) == ADDR_EXPR
464 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
467 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
470 print_generic_expr (f
, v
);
473 /* Print V which is extracted from a value in a lattice to F. */
476 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
481 /* Print a lattice LAT to F. */
483 template <typename valtype
>
485 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
487 ipcp_value
<valtype
> *val
;
492 fprintf (f
, "BOTTOM\n");
496 if (!values_count
&& !contains_variable
)
498 fprintf (f
, "TOP\n");
502 if (contains_variable
)
504 fprintf (f
, "VARIABLE");
510 for (val
= values
; val
; val
= val
->next
)
512 if (dump_benefits
&& prev
)
514 else if (!dump_benefits
&& prev
)
519 print_ipcp_constant_value (f
, val
->value
);
523 ipcp_value_source
<valtype
> *s
;
525 if (val
->self_recursion_generated_p ())
526 fprintf (f
, " [self_gen(%i), from:",
527 val
->self_recursion_generated_level
);
529 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
530 for (s
= val
->sources
; s
; s
= s
->next
)
531 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
532 s
->cs
->sreal_frequency ().to_double ());
537 fprintf (f
, " [loc_time: %g, loc_size: %i, "
538 "prop_time: %g, prop_size: %i]\n",
539 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
540 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
547 ipcp_bits_lattice::print (FILE *f
)
550 fprintf (f
, " Bits unknown (TOP)\n");
551 else if (bottom_p ())
552 fprintf (f
, " Bits unusable (BOTTOM)\n");
555 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
556 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
561 /* Print value range lattice to F. */
564 ipcp_vr_lattice::print (FILE * f
)
566 dump_value_range (f
, &m_vr
);
569 /* Print all ipcp_lattices of all functions to F. */
572 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
574 struct cgraph_node
*node
;
577 fprintf (f
, "\nLattices:\n");
578 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
580 class ipa_node_params
*info
;
582 info
= ipa_node_params_sum
->get (node
);
583 /* Skip unoptimized functions and constprop clones since we don't make
584 lattices for them. */
585 if (!info
|| info
->ipcp_orig_node
)
587 fprintf (f
, " Node: %s:\n", node
->dump_name ());
588 count
= ipa_get_param_count (info
);
589 for (i
= 0; i
< count
; i
++)
591 struct ipcp_agg_lattice
*aglat
;
592 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
593 fprintf (f
, " param [%d]: ", i
);
594 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
595 fprintf (f
, " ctxs: ");
596 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
597 plats
->bits_lattice
.print (f
);
599 plats
->m_value_range
.print (f
);
601 if (plats
->virt_call
)
602 fprintf (f
, " virt_call flag set\n");
604 if (plats
->aggs_bottom
)
606 fprintf (f
, " AGGS BOTTOM\n");
609 if (plats
->aggs_contain_variable
)
610 fprintf (f
, " AGGS VARIABLE\n");
611 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
613 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
614 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
615 aglat
->print (f
, dump_sources
, dump_benefits
);
621 /* Determine whether it is at all technically possible to create clones of NODE
622 and store this information in the ipa_node_params structure associated
626 determine_versionability (struct cgraph_node
*node
,
627 class ipa_node_params
*info
)
629 const char *reason
= NULL
;
631 /* There are a number of generic reasons functions cannot be versioned. We
632 also cannot remove parameters if there are type attributes such as fnspec
634 if (node
->alias
|| node
->thunk
)
635 reason
= "alias or thunk";
636 else if (!node
->versionable
)
637 reason
= "not a tree_versionable_function";
638 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
639 reason
= "insufficient body availability";
640 else if (!opt_for_fn (node
->decl
, optimize
)
641 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
642 reason
= "non-optimized function";
643 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
645 /* Ideally we should clone the SIMD clones themselves and create
646 vector copies of them, so IPA-cp and SIMD clones can happily
647 coexist, but that may not be worth the effort. */
648 reason
= "function has SIMD clones";
650 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
652 /* Ideally we should clone the target clones themselves and create
653 copies of them, so IPA-cp and target clones can happily
654 coexist, but that may not be worth the effort. */
655 reason
= "function target_clones attribute";
657 /* Don't clone decls local to a comdat group; it breaks and for C++
658 decloned constructors, inlining is always better anyway. */
659 else if (node
->comdat_local_p ())
660 reason
= "comdat-local function";
661 else if (node
->calls_comdat_local
)
663 /* TODO: call is versionable if we make sure that all
664 callers are inside of a comdat group. */
665 reason
= "calls comdat-local function";
668 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
669 work only when inlined. Cloning them may still lead to better code
670 because ipa-cp will not give up on cloning further. If the function is
671 external this however leads to wrong code because we may end up producing
672 offline copy of the function. */
673 if (DECL_EXTERNAL (node
->decl
))
674 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
675 edge
= edge
->next_callee
)
676 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
678 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
679 reason
= "external function which calls va_arg_pack";
680 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
681 == BUILT_IN_VA_ARG_PACK_LEN
)
682 reason
= "external function which calls va_arg_pack_len";
685 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
686 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
687 node
->dump_name (), reason
);
689 info
->versionable
= (reason
== NULL
);
692 /* Return true if it is at all technically possible to create clones of a
696 ipcp_versionable_function_p (struct cgraph_node
*node
)
698 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
699 return info
&& info
->versionable
;
702 /* Structure holding accumulated information about callers of a node. */
704 struct caller_statistics
706 /* If requested (see below), self-recursive call counts are summed into this
708 profile_count rec_count_sum
;
709 /* The sum of all ipa counts of all the other (non-recursive) calls. */
710 profile_count count_sum
;
711 /* Sum of all frequencies for all calls. */
713 /* Number of calls and hot calls respectively. */
714 int n_calls
, n_hot_calls
;
715 /* If itself is set up, also count the number of non-self-recursive
718 /* If non-NULL, this is the node itself and calls from it should have their
719 counts included in rec_count_sum and not count_sum. */
723 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
724 from IGNORED_CALLER are not counted. */
727 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
729 stats
->rec_count_sum
= profile_count::zero ();
730 stats
->count_sum
= profile_count::zero ();
732 stats
->n_hot_calls
= 0;
733 stats
->n_nonrec_calls
= 0;
735 stats
->itself
= itself
;
738 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
739 non-thunk incoming edges to NODE. */
742 gather_caller_stats (struct cgraph_node
*node
, void *data
)
744 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
745 struct cgraph_edge
*cs
;
747 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
748 if (!cs
->caller
->thunk
)
750 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
751 if (info
&& info
->node_dead
)
754 if (cs
->count
.ipa ().initialized_p ())
756 if (stats
->itself
&& stats
->itself
== cs
->caller
)
757 stats
->rec_count_sum
+= cs
->count
.ipa ();
759 stats
->count_sum
+= cs
->count
.ipa ();
761 stats
->freq_sum
+= cs
->sreal_frequency ();
763 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
764 stats
->n_nonrec_calls
++;
766 if (cs
->maybe_hot_p ())
767 stats
->n_hot_calls
++;
773 /* Return true if this NODE is viable candidate for cloning. */
776 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
778 struct caller_statistics stats
;
780 gcc_checking_assert (node
->has_gimple_body_p ());
782 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
785 fprintf (dump_file
, "Not considering %s for cloning; "
786 "-fipa-cp-clone disabled.\n",
791 if (node
->optimize_for_size_p ())
794 fprintf (dump_file
, "Not considering %s for cloning; "
795 "optimizing it for size.\n",
800 init_caller_stats (&stats
);
801 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
803 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
806 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
811 /* When profile is available and function is hot, propagate into it even if
812 calls seems cold; constant propagation can improve function's speed
814 if (stats
.count_sum
> profile_count::zero ()
815 && node
->count
.ipa ().initialized_p ())
817 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
820 fprintf (dump_file
, "Considering %s for cloning; "
821 "usually called directly.\n",
826 if (!stats
.n_hot_calls
)
829 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
834 fprintf (dump_file
, "Considering %s for cloning.\n",
839 template <typename valtype
>
840 class value_topo_info
843 /* Head of the linked list of topologically sorted values. */
844 ipcp_value
<valtype
> *values_topo
;
845 /* Stack for creating SCCs, represented by a linked list too. */
846 ipcp_value
<valtype
> *stack
;
847 /* Counter driving the algorithm in add_val_to_toposort. */
850 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
852 void add_val (ipcp_value
<valtype
> *cur_val
);
853 void propagate_effects ();
856 /* Arrays representing a topological ordering of call graph nodes and a stack
857 of nodes used during constant propagation and also data required to perform
858 topological sort of values and propagation of benefits in the determined
864 /* Array with obtained topological order of cgraph nodes. */
865 struct cgraph_node
**order
;
866 /* Stack of cgraph nodes used during propagation within SCC until all values
867 in the SCC stabilize. */
868 struct cgraph_node
**stack
;
869 int nnodes
, stack_top
;
871 value_topo_info
<tree
> constants
;
872 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
874 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
879 /* Skip edges from and to nodes without ipa_cp enabled.
880 Ignore not available symbols. */
883 ignore_edge_p (cgraph_edge
*e
)
885 enum availability avail
;
886 cgraph_node
*ultimate_target
887 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
889 return (avail
<= AVAIL_INTERPOSABLE
890 || !opt_for_fn (ultimate_target
->decl
, optimize
)
891 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
894 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
897 build_toporder_info (class ipa_topo_info
*topo
)
899 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
900 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
902 gcc_checking_assert (topo
->stack_top
== 0);
903 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
907 /* Free information about strongly connected components and the arrays in
911 free_toporder_info (class ipa_topo_info
*topo
)
913 ipa_free_postorder_info ();
918 /* Add NODE to the stack in TOPO, unless it is already there. */
921 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
923 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
924 if (info
->node_enqueued
)
926 info
->node_enqueued
= 1;
927 topo
->stack
[topo
->stack_top
++] = node
;
930 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
933 static struct cgraph_node
*
934 pop_node_from_stack (class ipa_topo_info
*topo
)
938 struct cgraph_node
*node
;
940 node
= topo
->stack
[topo
->stack_top
];
941 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
948 /* Set lattice LAT to bottom and return true if it previously was not set as
951 template <typename valtype
>
953 ipcp_lattice
<valtype
>::set_to_bottom ()
960 /* Mark lattice as containing an unknown value and return true if it previously
961 was not marked as such. */
963 template <typename valtype
>
965 ipcp_lattice
<valtype
>::set_contains_variable ()
967 bool ret
= !contains_variable
;
968 contains_variable
= true;
972 /* Set all aggregate lattices in PLATS to bottom and return true if they were
973 not previously set as such. */
976 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
978 bool ret
= !plats
->aggs_bottom
;
979 plats
->aggs_bottom
= true;
983 /* Mark all aggregate lattices in PLATS as containing an unknown value and
984 return true if they were not previously marked as such. */
987 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
989 bool ret
= !plats
->aggs_contain_variable
;
990 plats
->aggs_contain_variable
= true;
995 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
997 return meet_with_1 (&other
.m_vr
);
1000 /* Meet the current value of the lattice with value range described by VR
1004 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
1006 return meet_with_1 (p_vr
);
1009 /* Meet the current value of the lattice with value range described by
1010 OTHER_VR lattice. Return TRUE if anything changed. */
1013 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
1018 if (other_vr
->varying_p ())
1019 return set_to_bottom ();
1021 value_range
save (m_vr
);
1022 m_vr
.union_ (*other_vr
);
1023 return m_vr
!= save
;
1026 /* Return true if value range information in the lattice is yet unknown. */
1029 ipcp_vr_lattice::top_p () const
1031 return m_vr
.undefined_p ();
1034 /* Return true if value range information in the lattice is known to be
1038 ipcp_vr_lattice::bottom_p () const
1040 return m_vr
.varying_p ();
1043 /* Set value range information in the lattice to bottom. Return true if it
1044 previously was in a different state. */
1047 ipcp_vr_lattice::set_to_bottom ()
1049 if (m_vr
.varying_p ())
1051 /* ?? We create all sorts of VARYING ranges for floats, structures,
1052 and other types which we cannot handle as ranges. We should
1053 probably avoid handling them throughout the pass, but it's easier
1054 to create a sensible VARYING here and let the lattice
1056 m_vr
.set_varying (integer_type_node
);
1060 /* Set lattice value to bottom, if it already isn't the case. */
1063 ipcp_bits_lattice::set_to_bottom ()
1067 m_lattice_val
= IPA_BITS_VARYING
;
1073 /* Set to constant if it isn't already. Only meant to be called
1074 when switching state from TOP. */
1077 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1079 gcc_assert (top_p ());
1080 m_lattice_val
= IPA_BITS_CONSTANT
;
1081 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1086 /* Return true if any of the known bits are non-zero. */
1089 ipcp_bits_lattice::known_nonzero_p () const
1093 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
1096 /* Convert operand to value, mask form. */
1099 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1101 wide_int
get_nonzero_bits (const_tree
);
1103 if (TREE_CODE (operand
) == INTEGER_CST
)
1105 *valuep
= wi::to_widest (operand
);
1115 /* Meet operation, similar to ccp_lattice_meet, we xor values
1116 if this->value, value have different values at same bit positions, we want
1117 to drop that bit to varying. Return true if mask is changed.
1118 This function assumes that the lattice value is in CONSTANT state. If
1119 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1122 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1123 unsigned precision
, bool drop_all_ones
)
1125 gcc_assert (constant_p ());
1127 widest_int old_mask
= m_mask
;
1128 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1133 if (wi::sext (m_mask
, precision
) == -1)
1134 return set_to_bottom ();
1136 return m_mask
!= old_mask
;
1139 /* Meet the bits lattice with operand
1140 described by <value, mask, sgn, precision. */
1143 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1151 if (wi::sext (mask
, precision
) == -1)
1152 return set_to_bottom ();
1153 return set_to_constant (value
, mask
);
1156 return meet_with_1 (value
, mask
, precision
, false);
1159 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1160 if code is binary operation or bit_value_unop (other) if code is unary op.
1161 In the case when code is nop_expr, no adjustment is required. If
1162 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1165 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1166 signop sgn
, enum tree_code code
, tree operand
,
1169 if (other
.bottom_p ())
1170 return set_to_bottom ();
1172 if (bottom_p () || other
.top_p ())
1175 widest_int adjusted_value
, adjusted_mask
;
1177 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1179 tree type
= TREE_TYPE (operand
);
1180 widest_int o_value
, o_mask
;
1181 get_value_and_mask (operand
, &o_value
, &o_mask
);
1183 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1184 sgn
, precision
, other
.get_value (), other
.get_mask (),
1185 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1187 if (wi::sext (adjusted_mask
, precision
) == -1)
1188 return set_to_bottom ();
1191 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1193 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1194 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1197 if (wi::sext (adjusted_mask
, precision
) == -1)
1198 return set_to_bottom ();
1202 return set_to_bottom ();
1208 adjusted_mask
|= adjusted_value
;
1209 adjusted_value
&= ~adjusted_mask
;
1211 if (wi::sext (adjusted_mask
, precision
) == -1)
1212 return set_to_bottom ();
1213 return set_to_constant (adjusted_value
, adjusted_mask
);
1216 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
1220 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1221 return true is any of them has not been marked as such so far. */
1224 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1227 ret
= plats
->itself
.set_contains_variable ();
1228 ret
|= plats
->ctxlat
.set_contains_variable ();
1229 ret
|= set_agg_lats_contain_variable (plats
);
1230 ret
|= plats
->bits_lattice
.set_to_bottom ();
1231 ret
|= plats
->m_value_range
.set_to_bottom ();
1235 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1236 points to by the number of callers to NODE. */
1239 count_callers (cgraph_node
*node
, void *data
)
1241 int *caller_count
= (int *) data
;
1243 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1244 /* Local thunks can be handled transparently, but if the thunk cannot
1245 be optimized out, count it as a real use. */
1246 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1251 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1252 the one caller of some other node. Set the caller's corresponding flag. */
1255 set_single_call_flag (cgraph_node
*node
, void *)
1257 cgraph_edge
*cs
= node
->callers
;
1258 /* Local thunks can be handled transparently, skip them. */
1259 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1260 cs
= cs
->next_caller
;
1262 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1264 info
->node_calling_single_call
= true;
1270 /* Initialize ipcp_lattices. */
1273 initialize_node_lattices (struct cgraph_node
*node
)
1275 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1276 struct cgraph_edge
*ie
;
1277 bool disable
= false, variable
= false;
1280 gcc_checking_assert (node
->has_gimple_body_p ());
1282 if (!ipa_get_param_count (info
))
1284 else if (node
->local
)
1286 int caller_count
= 0;
1287 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1289 gcc_checking_assert (caller_count
> 0);
1290 if (caller_count
== 1)
1291 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1296 /* When cloning is allowed, we can assume that externally visible
1297 functions are not called. We will compensate this by cloning
1299 if (ipcp_versionable_function_p (node
)
1300 && ipcp_cloning_candidate_p (node
))
1306 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1307 && !node
->alias
&& !node
->thunk
)
1309 fprintf (dump_file
, "Initializing lattices of %s\n",
1310 node
->dump_name ());
1311 if (disable
|| variable
)
1312 fprintf (dump_file
, " Marking all lattices as %s\n",
1313 disable
? "BOTTOM" : "VARIABLE");
1316 auto_vec
<bool, 16> surviving_params
;
1317 bool pre_modified
= false;
1319 clone_info
*cinfo
= clone_info::get (node
);
1321 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1323 /* At the moment all IPA optimizations should use the number of
1324 parameters of the prevailing decl as the m_always_copy_start.
1325 Handling any other value would complicate the code below, so for the
1326 time bing let's only assert it is so. */
1327 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1328 == ipa_get_param_count (info
))
1329 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1331 pre_modified
= true;
1332 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1334 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1335 && !node
->alias
&& !node
->thunk
)
1338 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1340 if (j
< (int) surviving_params
.length ()
1341 && surviving_params
[j
])
1346 " The following parameters are dead on arrival:");
1349 fprintf (dump_file
, " %u", j
);
1352 fprintf (dump_file
, "\n");
1356 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1358 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1360 || !ipa_get_type (info
, i
)
1361 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1362 || !surviving_params
[i
])))
1364 plats
->itself
.set_to_bottom ();
1365 plats
->ctxlat
.set_to_bottom ();
1366 set_agg_lats_to_bottom (plats
);
1367 plats
->bits_lattice
.set_to_bottom ();
1368 plats
->m_value_range
.m_vr
= value_range ();
1369 plats
->m_value_range
.set_to_bottom ();
1373 plats
->m_value_range
.init ();
1375 set_all_contains_variable (plats
);
1379 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1380 if (ie
->indirect_info
->polymorphic
1381 && ie
->indirect_info
->param_index
>= 0)
1383 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1384 ipa_get_parm_lattices (info
,
1385 ie
->indirect_info
->param_index
)->virt_call
= 1;
1389 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1393 ipacp_value_safe_for_type (tree param_type
, tree value
)
1395 tree val_type
= TREE_TYPE (value
);
1396 if (param_type
== val_type
1397 || useless_type_conversion_p (param_type
, val_type
)
1398 || fold_convertible_p (param_type
, value
))
1404 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1407 values_equal_for_ipcp_p (tree x
, tree y
)
1409 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
1414 if (TREE_CODE (x
) == ADDR_EXPR
1415 && TREE_CODE (y
) == ADDR_EXPR
1416 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
1417 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
1418 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
1419 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
1421 return operand_equal_p (x
, y
, 0);
1424 /* Return the result of a (possibly arithmetic) operation on the constant
1425 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1426 the type of the parameter to which the result is passed. Return
1427 NULL_TREE if that cannot be determined or be considered an
1428 interprocedural invariant. */
1431 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1436 if (opcode
== NOP_EXPR
)
1438 if (!is_gimple_ip_invariant (input
))
1441 if (opcode
== ASSERT_EXPR
)
1443 if (values_equal_for_ipcp_p (input
, operand
))
1451 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1452 res_type
= boolean_type_node
;
1453 else if (expr_type_first_operand_type_p (opcode
))
1454 res_type
= TREE_TYPE (input
);
1459 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1460 res
= fold_unary (opcode
, res_type
, input
);
1462 res
= fold_binary (opcode
, res_type
, input
, operand
);
1464 if (res
&& !is_gimple_ip_invariant (res
))
1470 /* Return the result of a (possibly arithmetic) pass through jump function
1471 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1472 to which the result is passed. Return NULL_TREE if that cannot be
1473 determined or be considered an interprocedural invariant. */
1476 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1479 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1481 ipa_get_jf_pass_through_operand (jfunc
),
1485 /* Return the result of an ancestor jump function JFUNC on the constant value
1486 INPUT. Return NULL_TREE if that cannot be determined. */
1489 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1491 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1492 if (TREE_CODE (input
) == ADDR_EXPR
)
1494 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1495 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1496 if (known_eq (off
, 0))
1498 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1499 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1500 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1501 build_int_cst (ptr_type_node
, byte_offset
)));
1503 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1510 /* Determine whether JFUNC evaluates to a single known constant value and if
1511 so, return it. Otherwise return NULL. INFO describes the caller node or
1512 the one it is inlined to, so that pass-through jump functions can be
1513 evaluated. PARM_TYPE is the type of the parameter to which the result is
1517 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1520 if (jfunc
->type
== IPA_JF_CONST
)
1521 return ipa_get_jf_constant (jfunc
);
1522 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1523 || jfunc
->type
== IPA_JF_ANCESTOR
)
1528 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1529 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1531 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1533 if (info
->ipcp_orig_node
)
1534 input
= info
->known_csts
[idx
];
1537 ipcp_lattice
<tree
> *lat
;
1540 || idx
>= ipa_get_param_count (info
))
1542 lat
= ipa_get_scalar_lat (info
, idx
);
1543 if (!lat
->is_single_const ())
1545 input
= lat
->values
->value
;
1551 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1552 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1554 return ipa_get_jf_ancestor_result (jfunc
, input
);
1560 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1561 that INFO describes the caller node or the one it is inlined to, CS is the
1562 call graph edge corresponding to JFUNC and CSIDX index of the described
1565 ipa_polymorphic_call_context
1566 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1567 ipa_jump_func
*jfunc
)
1569 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1570 ipa_polymorphic_call_context ctx
;
1571 ipa_polymorphic_call_context
*edge_ctx
1572 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1574 if (edge_ctx
&& !edge_ctx
->useless_p ())
1577 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1578 || jfunc
->type
== IPA_JF_ANCESTOR
)
1580 ipa_polymorphic_call_context srcctx
;
1582 bool type_preserved
= true;
1583 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1585 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1587 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1588 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1592 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1593 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1595 if (info
->ipcp_orig_node
)
1597 if (info
->known_contexts
.exists ())
1598 srcctx
= info
->known_contexts
[srcidx
];
1603 || srcidx
>= ipa_get_param_count (info
))
1605 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1606 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1607 if (!lat
->is_single_const ())
1609 srcctx
= lat
->values
->value
;
1611 if (srcctx
.useless_p ())
1613 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1614 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1615 if (!type_preserved
)
1616 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1617 srcctx
.combine_with (ctx
);
1624 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1625 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1626 the result is a range or an anti-range. */
1629 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1630 value_range
*src_vr
,
1631 enum tree_code operation
,
1632 tree dst_type
, tree src_type
)
1634 range_fold_unary_expr (dst_vr
, operation
, dst_type
, src_vr
, src_type
);
1635 if (dst_vr
->varying_p () || dst_vr
->undefined_p ())
1640 /* Determine value_range of JFUNC given that INFO describes the caller node or
1641 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1642 and PARM_TYPE of the parameter. */
1645 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1646 ipa_jump_func
*jfunc
, tree parm_type
)
1650 ipa_vr_operation_and_type_effects (&vr
,
1652 NOP_EXPR
, parm_type
,
1653 jfunc
->m_vr
->type ());
1654 if (vr
.singleton_p ())
1656 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1659 ipcp_transformation
*sum
1660 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1661 ? cs
->caller
->inlined_to
1663 if (!sum
|| !sum
->m_vr
)
1666 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1668 if (!(*sum
->m_vr
)[idx
].known
)
1670 tree vr_type
= ipa_get_type (info
, idx
);
1671 value_range
srcvr (wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].min
),
1672 wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].max
),
1673 (*sum
->m_vr
)[idx
].type
);
1675 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1677 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1681 if (ipa_vr_operation_and_type_effects (&res
,
1683 operation
, parm_type
,
1689 value_range op_res
, res
;
1690 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1691 value_range
op_vr (op
, op
);
1693 range_fold_binary_expr (&op_res
, operation
, vr_type
, &srcvr
, &op_vr
);
1694 if (ipa_vr_operation_and_type_effects (&res
,
1696 NOP_EXPR
, parm_type
,
1704 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1705 parameter with the given INDEX. */
1708 get_clone_agg_value (struct cgraph_node
*node
, HOST_WIDE_INT offset
,
1711 struct ipa_agg_replacement_value
*aggval
;
1713 aggval
= ipa_get_agg_replacements_for_node (node
);
1716 if (aggval
->offset
== offset
1717 && aggval
->index
== index
)
1718 return aggval
->value
;
1719 aggval
= aggval
->next
;
1724 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1725 single known constant value and if so, return it. Otherwise return NULL.
1726 NODE and INFO describes the caller node or the one it is inlined to, and
1727 its related info. */
1730 ipa_agg_value_from_node (class ipa_node_params
*info
,
1731 struct cgraph_node
*node
,
1732 struct ipa_agg_jf_item
*item
)
1734 tree value
= NULL_TREE
;
1737 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
1740 if (item
->jftype
== IPA_JF_CONST
)
1741 return item
->value
.constant
;
1743 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1744 || item
->jftype
== IPA_JF_LOAD_AGG
);
1746 src_idx
= item
->value
.pass_through
.formal_id
;
1748 if (info
->ipcp_orig_node
)
1750 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1751 value
= info
->known_csts
[src_idx
];
1753 value
= get_clone_agg_value (node
, item
->value
.load_agg
.offset
,
1756 else if (info
->lattices
)
1758 class ipcp_param_lattices
*src_plats
1759 = ipa_get_parm_lattices (info
, src_idx
);
1761 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1763 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1765 if (!lat
->is_single_const ())
1768 value
= lat
->values
->value
;
1770 else if (src_plats
->aggs
1771 && !src_plats
->aggs_bottom
1772 && !src_plats
->aggs_contain_variable
1773 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1775 struct ipcp_agg_lattice
*aglat
;
1777 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1779 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1782 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1784 if (aglat
->is_single_const ())
1785 value
= aglat
->values
->value
;
1795 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1797 tree load_type
= item
->value
.load_agg
.type
;
1798 tree value_type
= TREE_TYPE (value
);
1800 /* Ensure value type is compatible with load type. */
1801 if (!useless_type_conversion_p (load_type
, value_type
))
1805 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1807 item
->value
.pass_through
.operand
,
1811 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1812 an aggregate and if so, return it. Otherwise return an empty set. NODE
1813 and INFO describes the caller node or the one it is inlined to, and its
1816 struct ipa_agg_value_set
1817 ipa_agg_value_set_from_jfunc (class ipa_node_params
*info
, cgraph_node
*node
,
1818 struct ipa_agg_jump_function
*agg_jfunc
)
1820 struct ipa_agg_value_set agg
;
1821 struct ipa_agg_jf_item
*item
;
1825 agg
.by_ref
= agg_jfunc
->by_ref
;
1827 FOR_EACH_VEC_SAFE_ELT (agg_jfunc
->items
, i
, item
)
1829 tree value
= ipa_agg_value_from_node (info
, node
, item
);
1833 struct ipa_agg_value value_item
;
1835 value_item
.offset
= item
->offset
;
1836 value_item
.value
= value
;
1838 agg
.items
.safe_push (value_item
);
1844 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1845 bottom, not containing a variable component and without any known value at
1849 ipcp_verify_propagated_values (void)
1851 struct cgraph_node
*node
;
1853 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1855 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1856 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1857 || !opt_for_fn (node
->decl
, optimize
))
1859 int i
, count
= ipa_get_param_count (info
);
1861 for (i
= 0; i
< count
; i
++)
1863 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1866 && !lat
->contains_variable
1867 && lat
->values_count
== 0)
1871 symtab
->dump (dump_file
);
1872 fprintf (dump_file
, "\nIPA lattices after constant "
1873 "propagation, before gcc_unreachable:\n");
1874 print_all_lattices (dump_file
, true, false);
1883 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1886 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1887 ipa_polymorphic_call_context y
)
1889 return x
.equal_to (y
);
1893 /* Add a new value source to the value represented by THIS, marking that a
1894 value comes from edge CS and (if the underlying jump function is a
1895 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1896 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1897 scalar value of the parameter itself or the offset within an aggregate. */
1899 template <typename valtype
>
1901 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1902 int src_idx
, HOST_WIDE_INT offset
)
1904 ipcp_value_source
<valtype
> *src
;
1906 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1907 src
->offset
= offset
;
1910 src
->index
= src_idx
;
1912 src
->next
= sources
;
1916 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1917 SOURCE and clear all other fields. */
1919 static ipcp_value
<tree
> *
1920 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1922 ipcp_value
<tree
> *val
;
1924 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1926 val
->self_recursion_generated_level
= same_lat_gen_level
;
1930 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1931 value to SOURCE and clear all other fields. */
1933 static ipcp_value
<ipa_polymorphic_call_context
> *
1934 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1935 unsigned same_lat_gen_level
)
1937 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1939 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1940 ipcp_value
<ipa_polymorphic_call_context
>();
1942 val
->self_recursion_generated_level
= same_lat_gen_level
;
1946 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1947 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1948 meaning. OFFSET -1 means the source is scalar and not a part of an
1949 aggregate. If non-NULL, VAL_P records address of existing or newly added
1952 If the value is generated for a self-recursive call as a result of an
1953 arithmetic pass-through jump-function acting on a value in the same lattice,
1954 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1955 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1957 template <typename valtype
>
1959 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1960 ipcp_value
<valtype
> *src_val
,
1961 int src_idx
, HOST_WIDE_INT offset
,
1962 ipcp_value
<valtype
> **val_p
,
1963 unsigned same_lat_gen_level
)
1965 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
1973 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
1974 if (values_equal_for_ipcp_p (val
->value
, newval
))
1979 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
1980 val
->self_recursion_generated_level
= same_lat_gen_level
;
1982 if (ipa_edge_within_scc (cs
))
1984 ipcp_value_source
<valtype
> *s
;
1985 for (s
= val
->sources
; s
; s
= s
->next
)
1986 if (s
->cs
== cs
&& s
->val
== src_val
)
1992 val
->add_source (cs
, src_val
, src_idx
, offset
);
1996 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
1997 param_ipa_cp_value_list_size
))
1999 /* We can only free sources, not the values themselves, because sources
2000 of other values in this SCC might point to them. */
2001 for (val
= values
; val
; val
= val
->next
)
2003 while (val
->sources
)
2005 ipcp_value_source
<valtype
> *src
= val
->sources
;
2006 val
->sources
= src
->next
;
2007 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2011 return set_to_bottom ();
2015 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2016 val
->add_source (cs
, src_val
, src_idx
, offset
);
2019 /* Add the new value to end of value list, which can reduce iterations
2020 of propagation stage for recursive function. */
2022 last_val
->next
= val
;
2032 /* A helper function that returns result of operation specified by OPCODE on
2033 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2034 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2035 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2039 get_val_across_arith_op (enum tree_code opcode
,
2042 ipcp_value
<tree
> *src_val
,
2045 tree opnd1
= src_val
->value
;
2047 /* Skip source values that is incompatible with specified type. */
2049 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2052 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2055 /* Propagate values through an arithmetic transformation described by a jump
2056 function associated with edge CS, taking values from SRC_LAT and putting
2057 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2058 OPND2 is a constant value if transformation is a binary operation.
2059 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2060 a part of the aggregate. SRC_IDX is the index of the source parameter.
2061 RES_TYPE is the value type of result being propagated into. Return true if
2062 DEST_LAT changed. */
2065 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2066 enum tree_code opcode
,
2069 ipcp_lattice
<tree
> *src_lat
,
2070 ipcp_lattice
<tree
> *dest_lat
,
2071 HOST_WIDE_INT src_offset
,
2075 ipcp_value
<tree
> *src_val
;
2078 /* Due to circular dependencies, propagating within an SCC through arithmetic
2079 transformation would create infinite number of values. But for
2080 self-feeding recursive function, we could allow propagation in a limited
2081 count, and this can enable a simple kind of recursive function versioning.
2082 For other scenario, we would just make lattices bottom. */
2083 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2087 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2088 param_ipa_cp_max_recursive_depth
);
2089 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2090 return dest_lat
->set_contains_variable ();
2092 /* No benefit if recursive execution is in low probability. */
2093 if (cs
->sreal_frequency () * 100
2094 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2095 param_ipa_cp_min_recursive_probability
))
2096 return dest_lat
->set_contains_variable ();
2098 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2100 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2102 /* Now we do not use self-recursively generated value as propagation
2103 source, this is absolutely conservative, but could avoid explosion
2104 of lattice's value space, especially when one recursive function
2105 calls another recursive. */
2106 if (src_val
->self_recursion_generated_p ())
2108 ipcp_value_source
<tree
> *s
;
2110 /* If the lattice has already been propagated for the call site,
2111 no need to do that again. */
2112 for (s
= src_val
->sources
; s
; s
= s
->next
)
2114 return dest_lat
->set_contains_variable ();
2117 val_seeds
.safe_push (src_val
);
2120 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2122 /* Recursively generate lattice values with a limited count. */
2123 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2125 for (int j
= 1; j
< max_recursive_depth
; j
++)
2127 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2130 || !ipacp_value_safe_for_type (res_type
, cstval
))
2133 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2134 src_offset
, &src_val
, j
);
2135 gcc_checking_assert (src_val
);
2138 ret
|= dest_lat
->set_contains_variable ();
2141 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2143 /* Now we do not use self-recursively generated value as propagation
2144 source, otherwise it is easy to make value space of normal lattice
2146 if (src_val
->self_recursion_generated_p ())
2148 ret
|= dest_lat
->set_contains_variable ();
2152 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2155 && ipacp_value_safe_for_type (res_type
, cstval
))
2156 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2159 ret
|= dest_lat
->set_contains_variable ();
2165 /* Propagate values through a pass-through jump function JFUNC associated with
2166 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2167 is the index of the source parameter. PARM_TYPE is the type of the
2168 parameter to which the result is passed. */
2171 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2172 ipcp_lattice
<tree
> *src_lat
,
2173 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2176 return propagate_vals_across_arith_jfunc (cs
,
2177 ipa_get_jf_pass_through_operation (jfunc
),
2179 ipa_get_jf_pass_through_operand (jfunc
),
2180 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2183 /* Propagate values through an ancestor jump function JFUNC associated with
2184 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2185 is the index of the source parameter. */
2188 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2189 struct ipa_jump_func
*jfunc
,
2190 ipcp_lattice
<tree
> *src_lat
,
2191 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2194 ipcp_value
<tree
> *src_val
;
2197 if (ipa_edge_within_scc (cs
))
2198 return dest_lat
->set_contains_variable ();
2200 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2202 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2204 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2205 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2207 ret
|= dest_lat
->set_contains_variable ();
2213 /* Propagate scalar values across jump function JFUNC that is associated with
2214 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2215 parameter to which the result is passed. */
2218 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2219 struct ipa_jump_func
*jfunc
,
2220 ipcp_lattice
<tree
> *dest_lat
,
2223 if (dest_lat
->bottom
)
2226 if (jfunc
->type
== IPA_JF_CONST
)
2228 tree val
= ipa_get_jf_constant (jfunc
);
2229 if (ipacp_value_safe_for_type (param_type
, val
))
2230 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2232 return dest_lat
->set_contains_variable ();
2234 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2235 || jfunc
->type
== IPA_JF_ANCESTOR
)
2237 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2238 ipcp_lattice
<tree
> *src_lat
;
2242 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2243 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2245 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2247 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2248 if (src_lat
->bottom
)
2249 return dest_lat
->set_contains_variable ();
2251 /* If we would need to clone the caller and cannot, do not propagate. */
2252 if (!ipcp_versionable_function_p (cs
->caller
)
2253 && (src_lat
->contains_variable
2254 || (src_lat
->values_count
> 1)))
2255 return dest_lat
->set_contains_variable ();
2257 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2258 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2262 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2263 src_idx
, param_type
);
2265 if (src_lat
->contains_variable
)
2266 ret
|= dest_lat
->set_contains_variable ();
2271 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2272 use it for indirect inlining), we should propagate them too. */
2273 return dest_lat
->set_contains_variable ();
2276 /* Propagate scalar values across jump function JFUNC that is associated with
2277 edge CS and describes argument IDX and put the values into DEST_LAT. */
2280 propagate_context_across_jump_function (cgraph_edge
*cs
,
2281 ipa_jump_func
*jfunc
, int idx
,
2282 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2284 if (dest_lat
->bottom
)
2286 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2288 bool added_sth
= false;
2289 bool type_preserved
= true;
2291 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2292 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2295 edge_ctx
= *edge_ctx_ptr
;
2297 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2298 || jfunc
->type
== IPA_JF_ANCESTOR
)
2300 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2302 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2304 /* TODO: Once we figure out how to propagate speculations, it will
2305 probably be a good idea to switch to speculation if type_preserved is
2306 not set instead of punting. */
2307 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2309 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2311 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2312 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2316 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2317 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2320 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2321 /* If we would need to clone the caller and cannot, do not propagate. */
2322 if (!ipcp_versionable_function_p (cs
->caller
)
2323 && (src_lat
->contains_variable
2324 || (src_lat
->values_count
> 1)))
2327 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2328 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2330 ipa_polymorphic_call_context cur
= src_val
->value
;
2332 if (!type_preserved
)
2333 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2334 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2335 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2336 /* TODO: In cases we know how the context is going to be used,
2337 we can improve the result by passing proper OTR_TYPE. */
2338 cur
.combine_with (edge_ctx
);
2339 if (!cur
.useless_p ())
2341 if (src_lat
->contains_variable
2342 && !edge_ctx
.equal_to (cur
))
2343 ret
|= dest_lat
->set_contains_variable ();
2344 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2353 if (!edge_ctx
.useless_p ())
2354 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2356 ret
|= dest_lat
->set_contains_variable ();
2362 /* Propagate bits across jfunc that is associated with
2363 edge cs and update dest_lattice accordingly. */
2366 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2367 ipa_jump_func
*jfunc
,
2368 ipcp_bits_lattice
*dest_lattice
)
2370 if (dest_lattice
->bottom_p ())
2373 enum availability availability
;
2374 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2375 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2376 tree parm_type
= ipa_get_type (callee_info
, idx
);
2378 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2379 transform for these cases. Similarly, we can have bad type mismatches
2380 with LTO, avoid doing anything with those too. */
2382 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2384 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2385 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2386 "param %i of %s is NULL or unsuitable for bits propagation\n",
2387 idx
, cs
->callee
->dump_name ());
2389 return dest_lattice
->set_to_bottom ();
2392 unsigned precision
= TYPE_PRECISION (parm_type
);
2393 signop sgn
= TYPE_SIGN (parm_type
);
2395 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2396 || jfunc
->type
== IPA_JF_ANCESTOR
)
2398 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2399 tree operand
= NULL_TREE
;
2400 enum tree_code code
;
2402 bool keep_null
= false;
2404 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2406 code
= ipa_get_jf_pass_through_operation (jfunc
);
2407 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2408 if (code
!= NOP_EXPR
)
2409 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2413 code
= POINTER_PLUS_EXPR
;
2414 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2415 unsigned HOST_WIDE_INT offset
2416 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2417 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2418 operand
= build_int_cstu (size_type_node
, offset
);
2421 class ipcp_param_lattices
*src_lats
2422 = ipa_get_parm_lattices (caller_info
, src_idx
);
2424 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2430 Assume lattice for x is bottom, however we can still propagate
2431 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2432 and we store it in jump function during analysis stage. */
2434 if (!src_lats
->bits_lattice
.bottom_p ())
2437 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2439 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2440 sgn
, code
, operand
, drop_all_ones
);
2445 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2448 return dest_lattice
->set_to_bottom ();
2451 /* Propagate value range across jump function JFUNC that is associated with
2452 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2456 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2457 class ipcp_param_lattices
*dest_plats
,
2460 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2462 if (dest_lat
->bottom_p ())
2466 || (!INTEGRAL_TYPE_P (param_type
)
2467 && !POINTER_TYPE_P (param_type
)))
2468 return dest_lat
->set_to_bottom ();
2470 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2472 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2473 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2474 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2475 class ipcp_param_lattices
*src_lats
2476 = ipa_get_parm_lattices (caller_info
, src_idx
);
2477 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2479 if (src_lats
->m_value_range
.bottom_p ())
2480 return dest_lat
->set_to_bottom ();
2483 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2484 ipa_vr_operation_and_type_effects (&vr
,
2485 &src_lats
->m_value_range
.m_vr
,
2486 operation
, param_type
,
2488 /* A crude way to prevent unbounded number of value range updates
2489 in SCC components. We should allow limited number of updates within
2491 else if (!ipa_edge_within_scc (cs
))
2493 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2494 value_range
op_vr (op
, op
);
2495 value_range op_res
,res
;
2497 range_fold_binary_expr (&op_res
, operation
, operand_type
,
2498 &src_lats
->m_value_range
.m_vr
, &op_vr
);
2499 ipa_vr_operation_and_type_effects (&vr
,
2501 NOP_EXPR
, param_type
,
2504 if (!vr
.undefined_p () && !vr
.varying_p ())
2509 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2512 jfunc
->m_vr
->type ()))
2515 return dest_lat
->meet_with (&vr
);
2518 else if (jfunc
->type
== IPA_JF_CONST
)
2520 tree val
= ipa_get_jf_constant (jfunc
);
2521 if (TREE_CODE (val
) == INTEGER_CST
)
2523 val
= fold_convert (param_type
, val
);
2524 if (TREE_OVERFLOW_P (val
))
2525 val
= drop_tree_overflow (val
);
2527 value_range
tmpvr (val
, val
);
2528 return dest_lat
->meet_with (&tmpvr
);
2534 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2536 jfunc
->m_vr
->type ()))
2537 return dest_lat
->meet_with (&vr
);
2539 return dest_lat
->set_to_bottom ();
2542 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2543 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2544 other cases, return false). If there are no aggregate items, set
2545 aggs_by_ref to NEW_AGGS_BY_REF. */
2548 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2549 bool new_aggs_by_ref
)
2551 if (dest_plats
->aggs
)
2553 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2555 set_agg_lats_to_bottom (dest_plats
);
2560 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2564 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2565 already existing lattice for the given OFFSET and SIZE, marking all skipped
2566 lattices as containing variable and checking for overlaps. If there is no
2567 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2568 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2569 unless there are too many already. If there are two many, return false. If
2570 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2571 skipped lattices were newly marked as containing variable, set *CHANGE to
2572 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2575 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2576 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2577 struct ipcp_agg_lattice
***aglat
,
2578 bool pre_existing
, bool *change
, int max_agg_items
)
2580 gcc_checking_assert (offset
>= 0);
2582 while (**aglat
&& (**aglat
)->offset
< offset
)
2584 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2586 set_agg_lats_to_bottom (dest_plats
);
2589 *change
|= (**aglat
)->set_contains_variable ();
2590 *aglat
= &(**aglat
)->next
;
2593 if (**aglat
&& (**aglat
)->offset
== offset
)
2595 if ((**aglat
)->size
!= val_size
)
2597 set_agg_lats_to_bottom (dest_plats
);
2600 gcc_assert (!(**aglat
)->next
2601 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2606 struct ipcp_agg_lattice
*new_al
;
2608 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2610 set_agg_lats_to_bottom (dest_plats
);
2613 if (dest_plats
->aggs_count
== max_agg_items
)
2615 dest_plats
->aggs_count
++;
2616 new_al
= ipcp_agg_lattice_pool
.allocate ();
2617 memset (new_al
, 0, sizeof (*new_al
));
2619 new_al
->offset
= offset
;
2620 new_al
->size
= val_size
;
2621 new_al
->contains_variable
= pre_existing
;
2623 new_al
->next
= **aglat
;
2629 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2630 containing an unknown value. */
2633 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2638 ret
|= aglat
->set_contains_variable ();
2639 aglat
= aglat
->next
;
2644 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2645 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2646 parameter used for lattice value sources. Return true if DEST_PLATS changed
2650 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2651 class ipcp_param_lattices
*dest_plats
,
2652 class ipcp_param_lattices
*src_plats
,
2653 int src_idx
, HOST_WIDE_INT offset_delta
)
2655 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2656 struct ipcp_agg_lattice
**dst_aglat
;
2659 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2661 if (src_plats
->aggs_bottom
)
2662 return set_agg_lats_contain_variable (dest_plats
);
2663 if (src_plats
->aggs_contain_variable
)
2664 ret
|= set_agg_lats_contain_variable (dest_plats
);
2665 dst_aglat
= &dest_plats
->aggs
;
2667 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2668 param_ipa_max_agg_items
);
2669 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2671 src_aglat
= src_aglat
->next
)
2673 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2677 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2678 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2680 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2682 dst_aglat
= &(*dst_aglat
)->next
;
2683 if (src_aglat
->bottom
)
2685 ret
|= new_al
->set_contains_variable ();
2688 if (src_aglat
->contains_variable
)
2689 ret
|= new_al
->set_contains_variable ();
2690 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2693 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2696 else if (dest_plats
->aggs_bottom
)
2699 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2703 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2704 pass-through JFUNC and if so, whether it has conform and conforms to the
2705 rules about propagating values passed by reference. */
2708 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2709 struct ipa_jump_func
*jfunc
)
2711 return src_plats
->aggs
2712 && (!src_plats
->aggs_by_ref
2713 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2716 /* Propagate values through ITEM, jump function for a part of an aggregate,
2717 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2718 associated with the jump function. Return true if AGLAT changed in any
2722 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2723 struct ipa_agg_jf_item
*item
,
2724 struct ipcp_agg_lattice
*aglat
)
2726 class ipa_node_params
*caller_info
;
2727 class ipcp_param_lattices
*src_plats
;
2728 struct ipcp_lattice
<tree
> *src_lat
;
2729 HOST_WIDE_INT src_offset
;
2734 if (item
->jftype
== IPA_JF_CONST
)
2736 tree value
= item
->value
.constant
;
2738 gcc_checking_assert (is_gimple_ip_invariant (value
));
2739 return aglat
->add_value (value
, cs
, NULL
, 0);
2742 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2743 || item
->jftype
== IPA_JF_LOAD_AGG
);
2745 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2746 src_idx
= item
->value
.pass_through
.formal_id
;
2747 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2749 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2751 load_type
= NULL_TREE
;
2752 src_lat
= &src_plats
->itself
;
2757 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2758 struct ipcp_agg_lattice
*src_aglat
;
2760 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2761 if (src_aglat
->offset
>= load_offset
)
2764 load_type
= item
->value
.load_agg
.type
;
2766 || src_aglat
->offset
> load_offset
2767 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2768 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2769 return aglat
->set_contains_variable ();
2771 src_lat
= src_aglat
;
2772 src_offset
= load_offset
;
2776 || (!ipcp_versionable_function_p (cs
->caller
)
2777 && !src_lat
->is_single_const ()))
2778 return aglat
->set_contains_variable ();
2780 ret
= propagate_vals_across_arith_jfunc (cs
,
2781 item
->value
.pass_through
.operation
,
2783 item
->value
.pass_through
.operand
,
2789 if (src_lat
->contains_variable
)
2790 ret
|= aglat
->set_contains_variable ();
2795 /* Propagate scalar values across jump function JFUNC that is associated with
2796 edge CS and put the values into DEST_LAT. */
2799 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2800 struct ipa_jump_func
*jfunc
,
2801 class ipcp_param_lattices
*dest_plats
)
2805 if (dest_plats
->aggs_bottom
)
2808 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2809 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2811 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2812 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2813 class ipcp_param_lattices
*src_plats
;
2815 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2816 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2818 /* Currently we do not produce clobber aggregate jump
2819 functions, replace with merging when we do. */
2820 gcc_assert (!jfunc
->agg
.items
);
2821 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2826 else if (jfunc
->type
== IPA_JF_ANCESTOR
2827 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2829 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2830 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2831 class ipcp_param_lattices
*src_plats
;
2833 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2834 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2836 /* Currently we do not produce clobber aggregate jump
2837 functions, replace with merging when we do. */
2838 gcc_assert (!jfunc
->agg
.items
);
2839 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2840 ipa_get_jf_ancestor_offset (jfunc
));
2842 else if (!src_plats
->aggs_by_ref
)
2843 ret
|= set_agg_lats_to_bottom (dest_plats
);
2845 ret
|= set_agg_lats_contain_variable (dest_plats
);
2849 if (jfunc
->agg
.items
)
2851 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2852 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2853 struct ipa_agg_jf_item
*item
;
2856 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2859 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2860 param_ipa_max_agg_items
);
2861 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2863 HOST_WIDE_INT val_size
;
2865 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2867 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2869 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2870 &aglat
, pre_existing
, &ret
, max_agg_items
))
2872 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2873 aglat
= &(*aglat
)->next
;
2875 else if (dest_plats
->aggs_bottom
)
2879 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2882 ret
|= set_agg_lats_contain_variable (dest_plats
);
2887 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2888 non-thunk) destination, the call passes through a thunk. */
2891 call_passes_through_thunk (cgraph_edge
*cs
)
2893 cgraph_node
*alias_or_thunk
= cs
->callee
;
2894 while (alias_or_thunk
->alias
)
2895 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2896 return alias_or_thunk
->thunk
;
2899 /* Propagate constants from the caller to the callee of CS. INFO describes the
2903 propagate_constants_across_call (struct cgraph_edge
*cs
)
2905 class ipa_node_params
*callee_info
;
2906 enum availability availability
;
2907 cgraph_node
*callee
;
2908 class ipa_edge_args
*args
;
2910 int i
, args_count
, parms_count
;
2912 callee
= cs
->callee
->function_symbol (&availability
);
2913 if (!callee
->definition
)
2915 gcc_checking_assert (callee
->has_gimple_body_p ());
2916 callee_info
= ipa_node_params_sum
->get (callee
);
2920 args
= ipa_edge_args_sum
->get (cs
);
2921 parms_count
= ipa_get_param_count (callee_info
);
2922 if (parms_count
== 0)
2925 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2926 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2928 for (i
= 0; i
< parms_count
; i
++)
2929 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2933 args_count
= ipa_get_cs_argument_count (args
);
2935 /* If this call goes through a thunk we must not propagate to the first (0th)
2936 parameter. However, we might need to uncover a thunk from below a series
2937 of aliases first. */
2938 if (call_passes_through_thunk (cs
))
2940 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2947 for (; (i
< args_count
) && (i
< parms_count
); i
++)
2949 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
2950 class ipcp_param_lattices
*dest_plats
;
2951 tree param_type
= ipa_get_type (callee_info
, i
);
2953 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
2954 if (availability
== AVAIL_INTERPOSABLE
)
2955 ret
|= set_all_contains_variable (dest_plats
);
2958 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
2959 &dest_plats
->itself
,
2961 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
2962 &dest_plats
->ctxlat
);
2964 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
2965 &dest_plats
->bits_lattice
);
2966 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
2968 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
2969 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
2970 dest_plats
, param_type
);
2972 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
2975 for (; i
< parms_count
; i
++)
2976 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
2981 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2982 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2983 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2986 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
2987 const vec
<tree
> &known_csts
,
2988 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
2989 const vec
<ipa_agg_value_set
> &known_aggs
,
2990 struct ipa_agg_replacement_value
*agg_reps
,
2993 int param_index
= ie
->indirect_info
->param_index
;
2994 HOST_WIDE_INT anc_offset
;
2998 *speculative
= false;
3000 if (param_index
== -1)
3003 if (!ie
->indirect_info
->polymorphic
)
3007 if (ie
->indirect_info
->agg_contents
)
3010 if (agg_reps
&& ie
->indirect_info
->guaranteed_unmodified
)
3014 if (agg_reps
->index
== param_index
3015 && agg_reps
->offset
== ie
->indirect_info
->offset
3016 && agg_reps
->by_ref
== ie
->indirect_info
->by_ref
)
3018 t
= agg_reps
->value
;
3021 agg_reps
= agg_reps
->next
;
3026 const ipa_agg_value_set
*agg
;
3027 if (known_aggs
.length () > (unsigned int) param_index
)
3028 agg
= &known_aggs
[param_index
];
3031 bool from_global_constant
;
3032 t
= ipa_find_agg_cst_for_param (agg
,
3033 (unsigned) param_index
3034 < known_csts
.length ()
3035 ? known_csts
[param_index
]
3037 ie
->indirect_info
->offset
,
3038 ie
->indirect_info
->by_ref
,
3039 &from_global_constant
);
3041 && !from_global_constant
3042 && !ie
->indirect_info
->guaranteed_unmodified
)
3046 else if ((unsigned) param_index
< known_csts
.length ())
3047 t
= known_csts
[param_index
];
3050 && TREE_CODE (t
) == ADDR_EXPR
3051 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3052 return TREE_OPERAND (t
, 0);
3057 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3060 gcc_assert (!ie
->indirect_info
->agg_contents
);
3061 anc_offset
= ie
->indirect_info
->offset
;
3065 /* Try to work out value of virtual table pointer value in replacements. */
3066 if (!t
&& agg_reps
&& !ie
->indirect_info
->by_ref
)
3070 if (agg_reps
->index
== param_index
3071 && agg_reps
->offset
== ie
->indirect_info
->offset
3072 && agg_reps
->by_ref
)
3074 t
= agg_reps
->value
;
3077 agg_reps
= agg_reps
->next
;
3081 /* Try to work out value of virtual table pointer value in known
3082 aggregate values. */
3083 if (!t
&& known_aggs
.length () > (unsigned int) param_index
3084 && !ie
->indirect_info
->by_ref
)
3086 const ipa_agg_value_set
*agg
= &known_aggs
[param_index
];
3087 t
= ipa_find_agg_cst_for_param (agg
,
3088 (unsigned) param_index
3089 < known_csts
.length ()
3090 ? known_csts
[param_index
] : NULL
,
3091 ie
->indirect_info
->offset
, true);
3094 /* If we found the virtual table pointer, lookup the target. */
3098 unsigned HOST_WIDE_INT offset
;
3099 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3102 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3103 vtable
, offset
, &can_refer
);
3107 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3108 || !possible_polymorphic_call_target_p
3109 (ie
, cgraph_node::get (target
)))
3111 /* Do not speculate builtin_unreachable, it is stupid! */
3112 if (ie
->indirect_info
->vptr_changed
)
3114 target
= ipa_impossible_devirt_target (ie
, target
);
3116 *speculative
= ie
->indirect_info
->vptr_changed
;
3123 /* Do we know the constant value of pointer? */
3124 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3125 t
= known_csts
[param_index
];
3127 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3129 ipa_polymorphic_call_context context
;
3130 if (known_contexts
.length () > (unsigned int) param_index
)
3132 context
= known_contexts
[param_index
];
3133 context
.offset_by (anc_offset
);
3134 if (ie
->indirect_info
->vptr_changed
)
3135 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3136 ie
->indirect_info
->otr_type
);
3139 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3140 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3141 if (!ctx2
.useless_p ())
3142 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3147 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3149 if (ie
->indirect_info
->vptr_changed
)
3150 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3151 ie
->indirect_info
->otr_type
);
3156 vec
<cgraph_node
*>targets
;
3159 targets
= possible_polymorphic_call_targets
3160 (ie
->indirect_info
->otr_type
,
3161 ie
->indirect_info
->otr_token
,
3163 if (!final
|| targets
.length () > 1)
3165 struct cgraph_node
*node
;
3168 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3169 || ie
->speculative
|| !ie
->maybe_hot_p ())
3171 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3172 ie
->indirect_info
->otr_token
,
3176 *speculative
= true;
3177 target
= node
->decl
;
3184 *speculative
= false;
3185 if (targets
.length () == 1)
3186 target
= targets
[0]->decl
;
3188 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3191 if (target
&& !possible_polymorphic_call_target_p (ie
,
3192 cgraph_node::get (target
)))
3196 target
= ipa_impossible_devirt_target (ie
, target
);
3202 /* If an indirect edge IE can be turned into a direct one based on data in
3203 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3204 whether the discovered target is only speculative guess. */
3207 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3208 ipa_call_arg_values
*avals
,
3211 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3212 avals
->m_known_contexts
,
3213 avals
->m_known_aggs
,
3217 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3220 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3221 ipa_auto_call_arg_values
*avals
,
3224 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3225 avals
->m_known_contexts
,
3226 avals
->m_known_aggs
,
3230 /* Calculate devirtualization time bonus for NODE, assuming we know information
3231 about arguments stored in AVALS. */
3234 devirtualization_time_bonus (struct cgraph_node
*node
,
3235 ipa_auto_call_arg_values
*avals
)
3237 struct cgraph_edge
*ie
;
3240 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3242 struct cgraph_node
*callee
;
3243 class ipa_fn_summary
*isummary
;
3244 enum availability avail
;
3248 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3252 /* Only bare minimum benefit for clearly un-inlineable targets. */
3254 callee
= cgraph_node::get (target
);
3255 if (!callee
|| !callee
->definition
)
3257 callee
= callee
->function_symbol (&avail
);
3258 if (avail
< AVAIL_AVAILABLE
)
3260 isummary
= ipa_fn_summaries
->get (callee
);
3261 if (!isummary
|| !isummary
->inlinable
)
3264 int size
= ipa_size_summaries
->get (callee
)->size
;
3265 /* FIXME: The values below need re-considering and perhaps also
3266 integrating into the cost metrics, at lest in some very basic way. */
3267 int max_inline_insns_auto
3268 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3269 if (size
<= max_inline_insns_auto
/ 4)
3270 res
+= 31 / ((int)speculative
+ 1);
3271 else if (size
<= max_inline_insns_auto
/ 2)
3272 res
+= 15 / ((int)speculative
+ 1);
3273 else if (size
<= max_inline_insns_auto
3274 || DECL_DECLARED_INLINE_P (callee
->decl
))
3275 res
+= 7 / ((int)speculative
+ 1);
3281 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3284 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3287 ipa_hints hints
= estimates
.hints
;
3288 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3289 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3291 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3293 if (hints
& INLINE_HINT_loop_iterations
)
3294 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3296 if (hints
& INLINE_HINT_loop_stride
)
3297 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3302 /* If there is a reason to penalize the function described by INFO in the
3303 cloning goodness evaluation, do so. */
3306 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3309 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3310 evaluation
= (evaluation
3311 * (100 - opt_for_fn (node
->decl
,
3312 param_ipa_cp_recursion_penalty
))) / 100;
3314 if (info
->node_calling_single_call
)
3315 evaluation
= (evaluation
3316 * (100 - opt_for_fn (node
->decl
,
3317 param_ipa_cp_single_call_penalty
)))
3323 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3324 and SIZE_COST and with the sum of frequencies of incoming edges to the
3325 potential new clone in FREQUENCIES. */
3328 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3329 sreal freq_sum
, profile_count count_sum
,
3332 if (time_benefit
== 0
3333 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3334 || node
->optimize_for_size_p ())
3337 gcc_assert (size_cost
> 0);
3339 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3340 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3341 if (count_sum
.nonzero_p ())
3343 gcc_assert (base_count
.nonzero_p ());
3344 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3345 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3346 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3351 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3352 "size: %i, count_sum: ", time_benefit
.to_double (),
3354 count_sum
.dump (dump_file
);
3355 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3356 info
->node_within_scc
3357 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3358 info
->node_calling_single_call
? ", single_call" : "",
3359 evaluation
.to_double (), eval_threshold
);
3362 return evaluation
.to_int () >= eval_threshold
;
3366 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3367 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3370 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3371 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3372 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3374 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3375 info
->node_within_scc
3376 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3377 info
->node_calling_single_call
? ", single_call" : "",
3378 evaluation
.to_double (), eval_threshold
);
3380 return evaluation
.to_int () >= eval_threshold
;
3384 /* Return all context independent values from aggregate lattices in PLATS in a
3385 vector. Return NULL if there are none. */
3387 static vec
<ipa_agg_value
>
3388 context_independent_aggregate_values (class ipcp_param_lattices
*plats
)
3390 vec
<ipa_agg_value
> res
= vNULL
;
3392 if (plats
->aggs_bottom
3393 || plats
->aggs_contain_variable
3394 || plats
->aggs_count
== 0)
3397 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
;
3399 aglat
= aglat
->next
)
3400 if (aglat
->is_single_const ())
3402 struct ipa_agg_value item
;
3403 item
.offset
= aglat
->offset
;
3404 item
.value
= aglat
->values
->value
;
3405 res
.safe_push (item
);
3410 /* Grow vectors in AVALS and fill them with information about values of
3411 parameters that are known to be independent of the context. Only calculate
3412 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3413 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3414 parameters will be stored in it.
3416 TODO: Also grow context independent value range vectors. */
3419 gather_context_independent_values (class ipa_node_params
*info
,
3420 ipa_auto_call_arg_values
*avals
,
3421 bool calculate_aggs
,
3422 int *removable_params_cost
)
3424 int i
, count
= ipa_get_param_count (info
);
3427 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3428 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3430 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
3432 if (removable_params_cost
)
3433 *removable_params_cost
= 0;
3435 for (i
= 0; i
< count
; i
++)
3437 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3438 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3440 if (lat
->is_single_const ())
3442 ipcp_value
<tree
> *val
= lat
->values
;
3443 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3444 avals
->m_known_vals
[i
] = val
->value
;
3445 if (removable_params_cost
)
3446 *removable_params_cost
3447 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3450 else if (removable_params_cost
3451 && !ipa_is_param_used (info
, i
))
3452 *removable_params_cost
3453 += ipa_get_param_move_cost (info
, i
);
3455 if (!ipa_is_param_used (info
, i
))
3458 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3459 /* Do not account known context as reason for cloning. We can see
3460 if it permits devirtualization. */
3461 if (ctxlat
->is_single_const ())
3462 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3466 vec
<ipa_agg_value
> agg_items
;
3467 struct ipa_agg_value_set
*agg
;
3469 agg_items
= context_independent_aggregate_values (plats
);
3470 agg
= &avals
->m_known_aggs
[i
];
3471 agg
->items
= agg_items
;
3472 agg
->by_ref
= plats
->aggs_by_ref
;
3473 ret
|= !agg_items
.is_empty ();
3480 /* Perform time and size measurement of NODE with the context given in AVALS,
3481 calculate the benefit compared to the node without specialization and store
3482 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3483 context-independent or unused removable parameters and EST_MOVE_COST, the
3484 estimated movement of the considered parameter. */
3487 perform_estimation_of_a_value (cgraph_node
*node
,
3488 ipa_auto_call_arg_values
*avals
,
3489 int removable_params_cost
, int est_move_cost
,
3490 ipcp_value_base
*val
)
3493 ipa_call_estimates estimates
;
3495 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3497 /* Extern inline functions have no cloning local time benefits because they
3498 will be inlined anyway. The only reason to clone them is if it enables
3499 optimization in any of the functions they call. */
3500 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3503 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3504 + (devirtualization_time_bonus (node
, avals
)
3505 + hint_time_bonus (node
, estimates
)
3506 + removable_params_cost
+ est_move_cost
);
3508 int size
= estimates
.size
;
3509 gcc_checking_assert (size
>=0);
3510 /* The inliner-heuristics based estimates may think that in certain
3511 contexts some functions do not have any size at all but we want
3512 all specializations to have at least a tiny cost, not least not to
3517 val
->local_time_benefit
= time_benefit
;
3518 val
->local_size_cost
= size
;
3521 /* Get the overall limit oof growth based on parameters extracted from growth.
3522 it does not really make sense to mix functions with different overall growth
3523 limits but it is possible and if it happens, we do not want to select one
3527 get_max_overall_size (cgraph_node
*node
)
3529 long max_new_size
= orig_overall_size
;
3530 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3531 if (max_new_size
< large_unit
)
3532 max_new_size
= large_unit
;
3533 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3534 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3535 return max_new_size
;
3538 /* Iterate over known values of parameters of NODE and estimate the local
3539 effects in terms of time and size they have. */
3542 estimate_local_effects (struct cgraph_node
*node
)
3544 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3545 int i
, count
= ipa_get_param_count (info
);
3547 int removable_params_cost
;
3549 if (!count
|| !ipcp_versionable_function_p (node
))
3552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3553 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3555 ipa_auto_call_arg_values avals
;
3556 always_const
= gather_context_independent_values (info
, &avals
, true,
3557 &removable_params_cost
);
3558 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3559 if (always_const
|| devirt_bonus
3560 || (removable_params_cost
&& node
->can_change_signature
))
3562 struct caller_statistics stats
;
3563 ipa_call_estimates estimates
;
3565 init_caller_stats (&stats
);
3566 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3568 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3569 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3570 time
+= devirt_bonus
;
3571 time
+= hint_time_bonus (node
, estimates
);
3572 time
+= removable_params_cost
;
3573 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3576 fprintf (dump_file
, " - context independent values, size: %i, "
3577 "time_benefit: %f\n", size
, (time
).to_double ());
3579 if (size
<= 0 || node
->local
)
3581 info
->do_clone_for_all_contexts
= true;
3584 fprintf (dump_file
, " Decided to specialize for all "
3585 "known contexts, code not going to grow.\n");
3587 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3588 stats
.count_sum
, size
))
3590 if (size
+ overall_size
<= get_max_overall_size (node
))
3592 info
->do_clone_for_all_contexts
= true;
3593 overall_size
+= size
;
3596 fprintf (dump_file
, " Decided to specialize for all "
3597 "known contexts, growth (to %li) deemed "
3598 "beneficial.\n", overall_size
);
3600 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3601 fprintf (dump_file
, " Not cloning for all contexts because "
3602 "maximum unit size would be reached with %li.\n",
3603 size
+ overall_size
);
3605 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3606 fprintf (dump_file
, " Not cloning for all contexts because "
3607 "!good_cloning_opportunity_p.\n");
3611 for (i
= 0; i
< count
; i
++)
3613 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3614 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3615 ipcp_value
<tree
> *val
;
3619 || avals
.m_known_vals
[i
])
3622 for (val
= lat
->values
; val
; val
= val
->next
)
3624 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3625 avals
.m_known_vals
[i
] = val
->value
;
3627 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3628 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3631 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3633 fprintf (dump_file
, " - estimates for value ");
3634 print_ipcp_constant_value (dump_file
, val
->value
);
3635 fprintf (dump_file
, " for ");
3636 ipa_dump_param (dump_file
, info
, i
);
3637 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3638 val
->local_time_benefit
.to_double (),
3639 val
->local_size_cost
);
3642 avals
.m_known_vals
[i
] = NULL_TREE
;
3645 for (i
= 0; i
< count
; i
++)
3647 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3649 if (!plats
->virt_call
)
3652 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3653 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3657 || !avals
.m_known_contexts
[i
].useless_p ())
3660 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3662 avals
.m_known_contexts
[i
] = val
->value
;
3663 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3668 fprintf (dump_file
, " - estimates for polymorphic context ");
3669 print_ipcp_constant_value (dump_file
, val
->value
);
3670 fprintf (dump_file
, " for ");
3671 ipa_dump_param (dump_file
, info
, i
);
3672 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3673 val
->local_time_benefit
.to_double (),
3674 val
->local_size_cost
);
3677 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3680 for (i
= 0; i
< count
; i
++)
3682 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3684 if (plats
->aggs_bottom
|| !plats
->aggs
)
3687 ipa_agg_value_set
*agg
= &avals
.m_known_aggs
[i
];
3688 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3690 ipcp_value
<tree
> *val
;
3691 if (aglat
->bottom
|| !aglat
->values
3692 /* If the following is true, the one value is in known_aggs. */
3693 || (!plats
->aggs_contain_variable
3694 && aglat
->is_single_const ()))
3697 for (val
= aglat
->values
; val
; val
= val
->next
)
3699 struct ipa_agg_value item
;
3701 item
.offset
= aglat
->offset
;
3702 item
.value
= val
->value
;
3703 agg
->items
.safe_push (item
);
3705 perform_estimation_of_a_value (node
, &avals
,
3706 removable_params_cost
, 0, val
);
3708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3710 fprintf (dump_file
, " - estimates for value ");
3711 print_ipcp_constant_value (dump_file
, val
->value
);
3712 fprintf (dump_file
, " for ");
3713 ipa_dump_param (dump_file
, info
, i
);
3714 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3715 "]: time_benefit: %g, size: %i\n",
3716 plats
->aggs_by_ref
? "ref " : "",
3718 val
->local_time_benefit
.to_double (),
3719 val
->local_size_cost
);
3729 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3730 topological sort of values. */
3732 template <typename valtype
>
3734 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3736 ipcp_value_source
<valtype
> *src
;
3742 cur_val
->dfs
= dfs_counter
;
3743 cur_val
->low_link
= dfs_counter
;
3745 cur_val
->topo_next
= stack
;
3747 cur_val
->on_stack
= true;
3749 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3752 if (src
->val
->dfs
== 0)
3755 if (src
->val
->low_link
< cur_val
->low_link
)
3756 cur_val
->low_link
= src
->val
->low_link
;
3758 else if (src
->val
->on_stack
3759 && src
->val
->dfs
< cur_val
->low_link
)
3760 cur_val
->low_link
= src
->val
->dfs
;
3763 if (cur_val
->dfs
== cur_val
->low_link
)
3765 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3770 stack
= v
->topo_next
;
3771 v
->on_stack
= false;
3772 v
->scc_no
= cur_val
->dfs
;
3774 v
->scc_next
= scc_list
;
3777 while (v
!= cur_val
);
3779 cur_val
->topo_next
= values_topo
;
3780 values_topo
= cur_val
;
3784 /* Add all values in lattices associated with NODE to the topological sort if
3785 they are not there yet. */
3788 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3790 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3791 int i
, count
= ipa_get_param_count (info
);
3793 for (i
= 0; i
< count
; i
++)
3795 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3796 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3797 struct ipcp_agg_lattice
*aglat
;
3801 ipcp_value
<tree
> *val
;
3802 for (val
= lat
->values
; val
; val
= val
->next
)
3803 topo
->constants
.add_val (val
);
3806 if (!plats
->aggs_bottom
)
3807 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3810 ipcp_value
<tree
> *val
;
3811 for (val
= aglat
->values
; val
; val
= val
->next
)
3812 topo
->constants
.add_val (val
);
3815 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3816 if (!ctxlat
->bottom
)
3818 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3819 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3820 topo
->contexts
.add_val (ctxval
);
3825 /* One pass of constants propagation along the call graph edges, from callers
3826 to callees (requires topological ordering in TOPO), iterate over strongly
3827 connected components. */
3830 propagate_constants_topo (class ipa_topo_info
*topo
)
3834 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3837 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3838 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3840 /* First, iteratively propagate within the strongly connected component
3841 until all lattices stabilize. */
3842 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3843 if (v
->has_gimple_body_p ())
3845 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3846 && opt_for_fn (v
->decl
, optimize
))
3847 push_node_to_stack (topo
, v
);
3848 /* When V is not optimized, we can not push it to stack, but
3849 still we need to set all its callees lattices to bottom. */
3852 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3853 propagate_constants_across_call (cs
);
3857 v
= pop_node_from_stack (topo
);
3860 struct cgraph_edge
*cs
;
3861 class ipa_node_params
*info
= NULL
;
3862 bool self_scc
= true;
3864 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3865 if (ipa_edge_within_scc (cs
))
3867 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3874 info
= ipa_node_params_sum
->get (v
);
3875 info
->node_within_scc
= true;
3878 if (propagate_constants_across_call (cs
))
3879 push_node_to_stack (topo
, callee
);
3883 info
->node_is_self_scc
= self_scc
;
3885 v
= pop_node_from_stack (topo
);
3888 /* Afterwards, propagate along edges leading out of the SCC, calculates
3889 the local effects of the discovered constants and all valid values to
3890 their topological sort. */
3891 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3892 if (v
->has_gimple_body_p ()
3893 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3894 && opt_for_fn (v
->decl
, optimize
))
3896 struct cgraph_edge
*cs
;
3898 estimate_local_effects (v
);
3899 add_all_node_vals_to_toposort (v
, topo
);
3900 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3901 if (!ipa_edge_within_scc (cs
))
3902 propagate_constants_across_call (cs
);
3904 cycle_nodes
.release ();
3908 /* Propagate the estimated effects of individual values along the topological
3909 from the dependent values to those they depend on. */
3911 template <typename valtype
>
3913 value_topo_info
<valtype
>::propagate_effects ()
3915 ipcp_value
<valtype
> *base
;
3916 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3918 for (base
= values_topo
; base
; base
= base
->topo_next
)
3920 ipcp_value_source
<valtype
> *src
;
3921 ipcp_value
<valtype
> *val
;
3923 HOST_WIDE_INT size
= 0;
3925 for (val
= base
; val
; val
= val
->scc_next
)
3927 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3928 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3931 for (val
= base
; val
; val
= val
->scc_next
)
3933 processed_srcvals
.empty ();
3934 for (src
= val
->sources
; src
; src
= src
->next
)
3936 && src
->cs
->maybe_hot_p ())
3938 if (!processed_srcvals
.add (src
->val
))
3940 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3941 if (prop_size
< INT_MAX
)
3942 src
->val
->prop_size_cost
= prop_size
;
3947 int special_factor
= 1;
3948 if (val
->same_scc (src
->val
))
3950 = opt_for_fn(src
->cs
->caller
->decl
,
3951 param_ipa_cp_recursive_freq_factor
);
3952 else if (val
->self_recursion_generated_p ()
3953 && (src
->cs
->callee
->function_symbol ()
3954 == src
->cs
->caller
))
3956 int max_recur_gen_depth
3957 = opt_for_fn(src
->cs
->caller
->decl
,
3958 param_ipa_cp_max_recursive_depth
);
3959 special_factor
= max_recur_gen_depth
3960 - val
->self_recursion_generated_level
+ 1;
3963 src
->val
->prop_time_benefit
3964 += time
* special_factor
* src
->cs
->sreal_frequency ();
3969 val
->prop_time_benefit
= time
;
3970 val
->prop_size_cost
= size
;
3974 val
->prop_time_benefit
= 0;
3975 val
->prop_size_cost
= 0;
3981 /* Callback for qsort to sort counts of all edges. */
3984 compare_edge_profile_counts (const void *a
, const void *b
)
3986 const profile_count
*cnt1
= (const profile_count
*) a
;
3987 const profile_count
*cnt2
= (const profile_count
*) b
;
3997 /* Propagate constants, polymorphic contexts and their effects from the
3998 summaries interprocedurally. */
4001 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4003 struct cgraph_node
*node
;
4006 fprintf (dump_file
, "\n Propagating constants:\n\n");
4008 base_count
= profile_count::uninitialized ();
4010 bool compute_count_base
= false;
4011 unsigned base_count_pos_percent
= 0;
4012 FOR_EACH_DEFINED_FUNCTION (node
)
4014 if (node
->has_gimple_body_p ()
4015 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4016 && opt_for_fn (node
->decl
, optimize
))
4018 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4019 determine_versionability (node
, info
);
4021 unsigned nlattices
= ipa_get_param_count (info
);
4022 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
4023 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
4024 initialize_node_lattices (node
);
4026 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4027 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4028 overall_size
+= s
->self_size
;
4029 if (node
->count
.ipa ().initialized_p ())
4031 compute_count_base
= true;
4032 unsigned pos_percent
= opt_for_fn (node
->decl
,
4033 param_ipa_cp_profile_count_base
);
4034 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4038 if (compute_count_base
)
4040 auto_vec
<profile_count
> all_edge_counts
;
4041 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4042 FOR_EACH_DEFINED_FUNCTION (node
)
4043 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4045 profile_count count
= cs
->count
.ipa ();
4046 if (!(count
> profile_count::zero ()))
4049 enum availability avail
;
4051 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4052 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4053 if (info
&& info
->versionable
)
4054 all_edge_counts
.quick_push (count
);
4057 if (!all_edge_counts
.is_empty ())
4059 gcc_assert (base_count_pos_percent
<= 100);
4060 all_edge_counts
.qsort (compare_edge_profile_counts
);
4062 unsigned base_count_pos
4063 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4064 base_count
= all_edge_counts
[base_count_pos
];
4068 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4069 "position %u, arriving at: ", all_edge_counts
.length (),
4071 base_count
.dump (dump_file
);
4072 fprintf (dump_file
, "\n");
4076 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4077 "continuing as if without profile feedback.\n");
4080 orig_overall_size
= overall_size
;
4083 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4085 propagate_constants_topo (topo
);
4087 ipcp_verify_propagated_values ();
4088 topo
->constants
.propagate_effects ();
4089 topo
->contexts
.propagate_effects ();
4093 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4094 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4098 /* Discover newly direct outgoing edges from NODE which is a new clone with
4099 known KNOWN_CSTS and make them direct. */
4102 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4103 vec
<tree
> known_csts
,
4104 vec
<ipa_polymorphic_call_context
>
4106 struct ipa_agg_replacement_value
*aggvals
)
4108 struct cgraph_edge
*ie
, *next_ie
;
4111 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4116 next_ie
= ie
->next_callee
;
4117 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4118 vNULL
, aggvals
, &speculative
);
4121 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4122 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4123 int param_index
= ie
->indirect_info
->param_index
;
4124 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4128 if (cs
&& !agg_contents
&& !polymorphic
)
4130 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4131 int c
= ipa_get_controlled_uses (info
, param_index
);
4132 if (c
!= IPA_UNDESCRIBED_USE
4133 && !ipa_get_param_load_dereferenced (info
, param_index
))
4135 struct ipa_ref
*to_del
;
4138 ipa_set_controlled_uses (info
, param_index
, c
);
4139 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4140 fprintf (dump_file
, " controlled uses count of param "
4141 "%i bumped down to %i\n", param_index
, c
);
4143 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0)))
4145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4146 fprintf (dump_file
, " and even removing its "
4147 "cloning-created reference\n");
4148 to_del
->remove_reference ();
4154 /* Turning calls to direct calls will improve overall summary. */
4156 ipa_update_overall_fn_summary (node
);
4159 class edge_clone_summary
;
4160 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4162 /* Edge clone summary. */
4164 class edge_clone_summary
4167 /* Default constructor. */
4168 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4170 /* Default destructor. */
4171 ~edge_clone_summary ()
4174 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4176 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4179 cgraph_edge
*prev_clone
;
4180 cgraph_edge
*next_clone
;
4183 class edge_clone_summary_t
:
4184 public call_summary
<edge_clone_summary
*>
4187 edge_clone_summary_t (symbol_table
*symtab
):
4188 call_summary
<edge_clone_summary
*> (symtab
)
4190 m_initialize_when_cloning
= true;
4193 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4194 edge_clone_summary
*src_data
,
4195 edge_clone_summary
*dst_data
) final override
;
4198 /* Edge duplication hook. */
4201 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4202 edge_clone_summary
*src_data
,
4203 edge_clone_summary
*dst_data
)
4205 if (src_data
->next_clone
)
4206 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4207 dst_data
->prev_clone
= src_edge
;
4208 dst_data
->next_clone
= src_data
->next_clone
;
4209 src_data
->next_clone
= dst_edge
;
4212 /* Return true is CS calls DEST or its clone for all contexts. When
4213 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4214 edges from/to an all-context clone. */
4217 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4218 bool allow_recursion_to_clone
)
4220 enum availability availability
;
4221 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4223 if (availability
<= AVAIL_INTERPOSABLE
)
4227 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4230 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4231 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4234 /* Return true if edge CS does bring about the value described by SRC to
4235 DEST_VAL of node DEST or its clone for all contexts. */
4238 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4239 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4241 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4243 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4244 || caller_info
->node_dead
)
4250 if (caller_info
->ipcp_orig_node
)
4253 if (src
->offset
== -1)
4254 t
= caller_info
->known_csts
[src
->index
];
4256 t
= get_clone_agg_value (cs
->caller
, src
->offset
, src
->index
);
4257 return (t
!= NULL_TREE
4258 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4262 if (src
->val
== dest_val
)
4265 struct ipcp_agg_lattice
*aglat
;
4266 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4268 if (src
->offset
== -1)
4269 return (plats
->itself
.is_single_const ()
4270 && values_equal_for_ipcp_p (src
->val
->value
,
4271 plats
->itself
.values
->value
));
4274 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4276 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4277 if (aglat
->offset
== src
->offset
)
4278 return (aglat
->is_single_const ()
4279 && values_equal_for_ipcp_p (src
->val
->value
,
4280 aglat
->values
->value
));
4286 /* Return true if edge CS does bring about the value described by SRC to
4287 DST_VAL of node DEST or its clone for all contexts. */
4290 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4291 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4293 ipcp_value
<ipa_polymorphic_call_context
> *)
4295 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4297 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4298 || caller_info
->node_dead
)
4303 if (caller_info
->ipcp_orig_node
)
4304 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4305 && values_equal_for_ipcp_p (src
->val
->value
,
4306 caller_info
->known_contexts
[src
->index
]);
4308 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4310 return plats
->ctxlat
.is_single_const ()
4311 && values_equal_for_ipcp_p (src
->val
->value
,
4312 plats
->ctxlat
.values
->value
);
4315 /* Get the next clone in the linked list of clones of an edge. */
4317 static inline struct cgraph_edge
*
4318 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4320 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4321 return s
!= NULL
? s
->next_clone
: NULL
;
4324 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4325 of them is viable and hot, return true. In that case, for those that still
4326 hold, add their edge frequency and their number and cumulative profile
4327 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4328 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4330 template <typename valtype
>
4332 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4333 sreal
*freq_sum
, int *caller_count
,
4334 profile_count
*rec_count_sum
,
4335 profile_count
*nonrec_count_sum
)
4337 ipcp_value_source
<valtype
> *src
;
4340 profile_count rec_cnt
= profile_count::zero ();
4341 profile_count nonrec_cnt
= profile_count::zero ();
4343 bool non_self_recursive
= false;
4345 for (src
= val
->sources
; src
; src
= src
->next
)
4347 struct cgraph_edge
*cs
= src
->cs
;
4350 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4353 freq
+= cs
->sreal_frequency ();
4354 hot
|= cs
->maybe_hot_p ();
4355 if (cs
->caller
!= dest
)
4357 non_self_recursive
= true;
4358 if (cs
->count
.ipa ().initialized_p ())
4359 rec_cnt
+= cs
->count
.ipa ();
4361 else if (cs
->count
.ipa ().initialized_p ())
4362 nonrec_cnt
+= cs
->count
.ipa ();
4364 cs
= get_next_cgraph_edge_clone (cs
);
4368 /* If the only edges bringing a value are self-recursive ones, do not bother
4370 if (!non_self_recursive
)
4374 *caller_count
= count
;
4375 *rec_count_sum
= rec_cnt
;
4376 *nonrec_count_sum
= nonrec_cnt
;
4378 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4380 struct cgraph_edge
*cs
;
4382 /* Cold non-SCC source edge could trigger hot recursive execution of
4383 function. Consider the case as hot and rely on following cost model
4384 computation to further select right one. */
4385 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4386 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4393 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4394 to let a non-self-recursive caller be the first element. Thus, we can
4395 simplify intersecting operations on values that arrive from all of these
4396 callers, especially when there exists self-recursive call. Return true if
4397 this kind of adjustment is possible. */
4400 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4403 for (unsigned i
= 0; i
< callers
.length (); i
++)
4405 cgraph_edge
*cs
= callers
[i
];
4407 if (cs
->caller
!= node
)
4411 callers
[i
] = callers
[0];
4420 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4421 is assumed their number is known and equal to CALLER_COUNT. */
4423 template <typename valtype
>
4424 static vec
<cgraph_edge
*>
4425 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4428 ipcp_value_source
<valtype
> *src
;
4429 vec
<cgraph_edge
*> ret
;
4431 ret
.create (caller_count
);
4432 for (src
= val
->sources
; src
; src
= src
->next
)
4434 struct cgraph_edge
*cs
= src
->cs
;
4437 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4438 ret
.quick_push (cs
);
4439 cs
= get_next_cgraph_edge_clone (cs
);
4443 if (caller_count
> 1)
4444 adjust_callers_for_value_intersection (ret
, dest
);
4449 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4450 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4451 should be set to true when the reference created for the constant should be
4452 a load one and not an address one because the corresponding parameter p is
4455 static struct ipa_replace_map
*
4456 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4457 bool force_load_ref
)
4459 struct ipa_replace_map
*replace_map
;
4461 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4464 fprintf (dump_file
, " replacing ");
4465 ipa_dump_param (dump_file
, info
, parm_num
);
4467 fprintf (dump_file
, " with const ");
4468 print_generic_expr (dump_file
, value
);
4471 fprintf (dump_file
, " - forcing load reference\n");
4473 fprintf (dump_file
, "\n");
4475 replace_map
->parm_num
= parm_num
;
4476 replace_map
->new_tree
= value
;
4477 replace_map
->force_load_ref
= force_load_ref
;
4481 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4482 one, otherwise it will be referred to as the original node. */
4485 dump_profile_updates (cgraph_node
*node
, bool spec
)
4488 fprintf (dump_file
, " setting count of the specialized node %s to ",
4489 node
->dump_name ());
4491 fprintf (dump_file
, " setting count of the original node %s to ",
4492 node
->dump_name ());
4494 node
->count
.dump (dump_file
);
4495 fprintf (dump_file
, "\n");
4496 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4498 fprintf (dump_file
, " edge to %s has count ",
4499 cs
->callee
->dump_name ());
4500 cs
->count
.dump (dump_file
);
4501 fprintf (dump_file
, "\n");
4505 /* With partial train run we do not want to assume that original's count is
4506 zero whenever we redurect all executed edges to clone. Simply drop profile
4507 to local one in this case. In eany case, return the new value. ORIG_NODE
4508 is the original node and its count has not been updaed yet. */
4511 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4513 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4514 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4515 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4516 remainder
= remainder
.guessed_local ();
4521 /* Structure to sum counts coming from nodes other than the original node and
4524 struct gather_other_count_struct
4527 profile_count other_count
;
4530 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4531 counts that come from non-self-recursive calls.. */
4534 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4536 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4537 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4538 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4539 desc
->other_count
+= cs
->count
.ipa ();
4543 /* Structure to help analyze if we need to boost counts of some clones of some
4544 non-recursive edges to match the new callee count. */
4546 struct desc_incoming_count_struct
4549 hash_set
<cgraph_edge
*> *processed_edges
;
4550 profile_count count
;
4551 unsigned unproc_orig_rec_edges
;
4554 /* Go over edges calling NODE and its thunks and gather information about
4555 incoming counts so that we know if we need to make any adjustments. */
4558 analyze_clone_icoming_counts (cgraph_node
*node
,
4559 desc_incoming_count_struct
*desc
)
4561 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4562 if (cs
->caller
->thunk
)
4564 analyze_clone_icoming_counts (cs
->caller
, desc
);
4569 if (cs
->count
.initialized_p ())
4570 desc
->count
+= cs
->count
.ipa ();
4571 if (!desc
->processed_edges
->contains (cs
)
4572 && cs
->caller
->clone_of
== desc
->orig
)
4573 desc
->unproc_orig_rec_edges
++;
4577 /* If caller edge counts of a clone created for a self-recursive arithmetic
4578 jump function must be adjusted because it is coming from a the "seed" clone
4579 for the first value and so has been excessively scaled back as if it was not
4580 a recursive call, adjust it so that the incoming counts of NODE match its
4581 count. NODE is the node or its thunk. */
4584 adjust_clone_incoming_counts (cgraph_node
*node
,
4585 desc_incoming_count_struct
*desc
)
4587 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4588 if (cs
->caller
->thunk
)
4590 adjust_clone_incoming_counts (cs
->caller
, desc
);
4591 profile_count sum
= profile_count::zero ();
4592 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4593 if (e
->count
.initialized_p ())
4594 sum
+= e
->count
.ipa ();
4595 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4597 else if (!desc
->processed_edges
->contains (cs
)
4598 && cs
->caller
->clone_of
== desc
->orig
)
4600 cs
->count
+= desc
->count
;
4603 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4604 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4605 cs
->callee
->dump_name ());
4606 cs
->count
.dump (dump_file
);
4607 fprintf (dump_file
, "\n");
4612 /* When ORIG_NODE has been cloned for values which have been generated fora
4613 self-recursive call as a result of an arithmetic pass-through
4614 jump-functions, adjust its count together with counts of all such clones in
4615 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4617 The function sums the counts of the original node and all its clones that
4618 cannot be attributed to a specific clone because it comes from a
4619 non-recursive edge. This sum is then evenly divided between the clones and
4620 on top of that each one gets all the counts which can be attributed directly
4624 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4625 const vec
<cgraph_node
*> &self_gen_clones
)
4627 profile_count redist_sum
= orig_node
->count
.ipa ();
4628 if (!(redist_sum
> profile_count::zero ()))
4632 fprintf (dump_file
, " Updating profile of self recursive clone "
4635 gather_other_count_struct gocs
;
4636 gocs
.orig
= orig_node
;
4637 gocs
.other_count
= profile_count::zero ();
4639 auto_vec
<profile_count
, 8> other_edges_count
;
4640 for (cgraph_node
*n
: self_gen_clones
)
4642 gocs
.other_count
= profile_count::zero ();
4643 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4645 other_edges_count
.safe_push (gocs
.other_count
);
4646 redist_sum
-= gocs
.other_count
;
4649 hash_set
<cgraph_edge
*> processed_edges
;
4651 for (cgraph_node
*n
: self_gen_clones
)
4653 profile_count orig_count
= n
->count
;
4654 profile_count new_count
4655 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4656 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4657 n
->count
= new_count
;
4658 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4659 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4661 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4662 processed_edges
.add (cs
);
4664 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4665 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4670 /* There are still going to be edges to ORIG_NODE that have one or more
4671 clones coming from another node clone in SELF_GEN_CLONES and which we
4672 scaled by the same amount, which means that the total incoming sum of
4673 counts to ORIG_NODE will be too high, scale such edges back. */
4674 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4676 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4679 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4680 if (e
->callee
->ultimate_alias_target () == orig_node
4681 && processed_edges
.contains (e
))
4684 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4685 if (e
->callee
->ultimate_alias_target () == orig_node
4686 && processed_edges
.contains (e
))
4691 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4692 along self-recursive edges are likely to have fairly low count and so
4693 edges from them to nodes in the self_gen_clones do not correspond to the
4694 artificially distributed count of the nodes, the total sum of incoming
4695 edges to some clones might be too low. Detect this situation and correct
4697 for (cgraph_node
*n
: self_gen_clones
)
4699 if (!(n
->count
.ipa () > profile_count::zero ()))
4702 desc_incoming_count_struct desc
;
4703 desc
.orig
= orig_node
;
4704 desc
.processed_edges
= &processed_edges
;
4705 desc
.count
= profile_count::zero ();
4706 desc
.unproc_orig_rec_edges
= 0;
4707 analyze_clone_icoming_counts (n
, &desc
);
4709 if (n
->count
.differs_from_p (desc
.count
))
4711 if (n
->count
> desc
.count
4712 && desc
.unproc_orig_rec_edges
> 0)
4714 desc
.count
= n
->count
- desc
.count
;
4715 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
4716 adjust_clone_incoming_counts (n
, &desc
);
4720 " Unable to fix up incoming counts for %s.\n",
4726 for (cgraph_node
*n
: self_gen_clones
)
4727 dump_profile_updates (n
, n
!= orig_node
);
4731 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4732 their profile information to reflect this. This function should not be used
4733 for clones generated for arithmetic pass-through jump functions on a
4734 self-recursive call graph edge, that situation is handled by
4735 update_counts_for_self_gen_clones. */
4738 update_profiling_info (struct cgraph_node
*orig_node
,
4739 struct cgraph_node
*new_node
)
4741 struct caller_statistics stats
;
4742 profile_count new_sum
;
4743 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4745 if (!(orig_node_count
> profile_count::zero ()))
4750 fprintf (dump_file
, " Updating profile from original count: ");
4751 orig_node_count
.dump (dump_file
);
4752 fprintf (dump_file
, "\n");
4755 init_caller_stats (&stats
, new_node
);
4756 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4758 new_sum
= stats
.count_sum
;
4760 if (new_sum
> orig_node_count
)
4762 /* TODO: Perhaps this should be gcc_unreachable ()? */
4763 remainder
= profile_count::zero ().guessed_local ();
4765 else if (stats
.rec_count_sum
.nonzero_p ())
4767 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4768 /* There are self-recursive edges which are likely to bring in the
4769 majority of calls but which we must divide in between the original and
4771 init_caller_stats (&stats
, orig_node
);
4772 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4774 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4775 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4777 if (orig_node
->local
)
4779 if (!orig_nonrec_call_count
.nonzero_p ())
4782 fprintf (dump_file
, " The original is local and the only "
4783 "incoming edges from non-dead callers with nonzero "
4784 "counts are self-recursive, assuming it is cold.\n");
4785 /* The NEW_NODE count and counts of all its outgoing edges
4786 are still unmodified copies of ORIG_NODE's. Just clear
4787 the latter and bail out. */
4789 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4790 zero
= profile_count::zero ().guessed_local ();
4792 zero
= profile_count::adjusted_zero ();
4793 orig_node
->count
= zero
;
4794 for (cgraph_edge
*cs
= orig_node
->callees
;
4796 cs
= cs
->next_callee
)
4798 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4800 cs
= cs
->next_callee
)
4807 /* Let's behave as if there was another caller that accounts for all
4808 the calls that were either indirect or from other compilation
4810 orig_nonrec_calls
++;
4811 profile_count pretend_caller_count
4812 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4813 - stats
.rec_count_sum
);
4814 orig_nonrec_call_count
+= pretend_caller_count
;
4817 /* Divide all "unexplained" counts roughly proportionally to sums of
4818 counts of non-recursive calls.
4820 We put rather arbitrary limits on how many counts we claim because the
4821 number of non-self-recursive incoming count is only a rough guideline
4822 and there are cases (such as mcf) where using it blindly just takes
4823 too many. And if lattices are considered in the opposite order we
4824 could also take too few. */
4825 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4827 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4828 profile_count new_part
4829 = MAX(MIN (unexp
.apply_scale (new_sum
,
4830 new_sum
+ orig_nonrec_call_count
),
4831 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4832 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4835 fprintf (dump_file
, " Claiming ");
4836 new_part
.dump (dump_file
);
4837 fprintf (dump_file
, " of unexplained ");
4838 unexp
.dump (dump_file
);
4839 fprintf (dump_file
, " counts because of self-recursive "
4842 new_sum
+= new_part
;
4843 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4847 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4850 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4851 new_node
->count
= new_sum
;
4852 orig_node
->count
= remainder
;
4854 profile_count orig_new_node_count
= orig_node_count
;
4855 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4856 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4857 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4858 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4859 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4861 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4862 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4863 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4864 for (cgraph_edge
*cs
= orig_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4865 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4869 dump_profile_updates (new_node
, true);
4870 dump_profile_updates (orig_node
, false);
4874 /* Update the respective profile of specialized NEW_NODE and the original
4875 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4876 have been redirected to the specialized version. */
4879 update_specialized_profile (struct cgraph_node
*new_node
,
4880 struct cgraph_node
*orig_node
,
4881 profile_count redirected_sum
)
4883 struct cgraph_edge
*cs
;
4884 profile_count new_node_count
, orig_node_count
= orig_node
->count
;
4888 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4889 redirected_sum
.dump (dump_file
);
4890 fprintf (dump_file
, "\n");
4892 if (!(orig_node_count
> profile_count::zero ()))
4895 gcc_assert (orig_node_count
>= redirected_sum
);
4897 new_node_count
= new_node
->count
;
4898 new_node
->count
+= redirected_sum
;
4899 orig_node
->count
-= redirected_sum
;
4901 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4902 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4904 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4906 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4913 dump_profile_updates (new_node
, true);
4914 dump_profile_updates (orig_node
, false);
4918 static void adjust_references_in_caller (cgraph_edge
*cs
,
4919 symtab_node
*symbol
, int index
);
4921 /* Simple structure to pass a symbol and index (with same meaning as parameters
4922 of adjust_references_in_caller) through a void* parameter of a
4923 call_for_symbol_thunks_and_aliases callback. */
4924 struct symbol_and_index_together
4926 symtab_node
*symbol
;
4930 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4931 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4933 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4935 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4936 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4937 if (!cs
->caller
->thunk
)
4938 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4942 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4943 variable which is only dereferenced and which is represented by SYMBOL. See
4944 if we can remove ADDR reference in callers assosiated witht the call. */
4947 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4949 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4950 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4951 if (jfunc
->type
== IPA_JF_CONST
)
4953 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
4957 to_del
->remove_reference ();
4959 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4960 cs
->caller
->dump_name (), symbol
->dump_name ());
4964 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
4965 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
4968 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
4969 cgraph_node
*caller
= cs
->caller
;
4970 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
4971 /* TODO: This consistency check may be too big and not really
4972 that useful. Consider removing it. */
4974 if (caller_info
->ipcp_orig_node
)
4975 cst
= caller_info
->known_csts
[fidx
];
4978 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
4979 gcc_assert (lat
->is_single_const ());
4980 cst
= lat
->values
->value
;
4982 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
4983 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
4986 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
4987 if (cuses
== IPA_UNDESCRIBED_USE
)
4989 gcc_assert (cuses
> 0);
4991 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
4995 if (caller_info
->ipcp_orig_node
)
4997 /* Cloning machinery has created a reference here, we need to either
4998 remove it or change it to a read one. */
4999 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0);
5000 if (to_del
&& to_del
->use
== IPA_REF_ADDR
)
5002 to_del
->remove_reference ();
5004 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5005 cs
->caller
->dump_name (), symbol
->dump_name ());
5006 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5008 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5011 " ...and replaced it with LOAD one.\n");
5016 symbol_and_index_together pack
;
5017 pack
.symbol
= symbol
;
5019 if (caller
->can_change_signature
)
5020 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5025 /* Return true if we would like to remove a parameter from NODE when cloning it
5026 with KNOWN_CSTS scalar constants. */
5029 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5031 auto_vec
<bool, 16> surviving
;
5032 bool filled_vec
= false;
5033 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5034 int i
, count
= ipa_get_param_count (info
);
5036 for (i
= 0; i
< count
; i
++)
5038 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5043 clone_info
*info
= clone_info::get (node
);
5044 if (!info
|| !info
->param_adjustments
)
5046 info
->param_adjustments
->get_surviving_params (&surviving
);
5049 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5055 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5056 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5057 redirect all edges in CALLERS to it. */
5059 static struct cgraph_node
*
5060 create_specialized_node (struct cgraph_node
*node
,
5061 vec
<tree
> known_csts
,
5062 vec
<ipa_polymorphic_call_context
> known_contexts
,
5063 struct ipa_agg_replacement_value
*aggvals
,
5064 vec
<cgraph_edge
*> &callers
)
5066 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5067 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5068 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5069 struct ipa_agg_replacement_value
*av
;
5070 struct cgraph_node
*new_node
;
5071 int i
, count
= ipa_get_param_count (info
);
5072 clone_info
*cinfo
= clone_info::get (node
);
5073 ipa_param_adjustments
*old_adjustments
= cinfo
5074 ? cinfo
->param_adjustments
: NULL
;
5075 ipa_param_adjustments
*new_adjustments
;
5076 gcc_assert (!info
->ipcp_orig_node
);
5077 gcc_assert (node
->can_change_signature
5078 || !old_adjustments
);
5080 if (old_adjustments
)
5082 /* At the moment all IPA optimizations should use the number of
5083 parameters of the prevailing decl as the m_always_copy_start.
5084 Handling any other value would complicate the code below, so for the
5085 time bing let's only assert it is so. */
5086 gcc_assert (old_adjustments
->m_always_copy_start
== count
5087 || old_adjustments
->m_always_copy_start
< 0);
5088 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5089 for (i
= 0; i
< old_adj_count
; i
++)
5091 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5092 if (!node
->can_change_signature
5093 || old_adj
->op
!= IPA_PARAM_OP_COPY
5094 || (!known_csts
[old_adj
->base_index
]
5095 && ipa_is_param_used (info
, old_adj
->base_index
)))
5097 ipa_adjusted_param new_adj
= *old_adj
;
5099 new_adj
.prev_clone_adjustment
= true;
5100 new_adj
.prev_clone_index
= i
;
5101 vec_safe_push (new_params
, new_adj
);
5104 bool skip_return
= old_adjustments
->m_skip_return
;
5105 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5106 ipa_param_adjustments (new_params
, count
,
5109 else if (node
->can_change_signature
5110 && want_remove_some_param_p (node
, known_csts
))
5112 ipa_adjusted_param adj
;
5113 memset (&adj
, 0, sizeof (adj
));
5114 adj
.op
= IPA_PARAM_OP_COPY
;
5115 for (i
= 0; i
< count
; i
++)
5116 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5119 adj
.prev_clone_index
= i
;
5120 vec_safe_push (new_params
, adj
);
5122 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5123 ipa_param_adjustments (new_params
, count
, false));
5126 new_adjustments
= NULL
;
5128 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5129 for (i
= callers
.length () - 1; i
>= 0; i
--)
5131 cgraph_edge
*cs
= callers
[i
];
5132 if (cs
->caller
== node
)
5134 self_recursive_calls
.safe_push (cs
);
5135 callers
.unordered_remove (i
);
5138 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5139 for (i
= 0; i
< count
; i
++)
5141 tree t
= known_csts
[i
];
5145 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5147 bool load_ref
= false;
5148 symtab_node
*ref_symbol
;
5149 if (TREE_CODE (t
) == ADDR_EXPR
)
5151 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5152 if (TREE_CODE (base
) == VAR_DECL
5153 && ipa_get_controlled_uses (info
, i
) == 0
5154 && ipa_get_param_load_dereferenced (info
, i
)
5155 && (ref_symbol
= symtab_node::get (base
)))
5158 if (node
->can_change_signature
)
5159 for (cgraph_edge
*caller
: callers
)
5160 adjust_references_in_caller (caller
, ref_symbol
, i
);
5164 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5166 vec_safe_push (replace_trees
, replace_map
);
5169 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5170 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5172 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5173 new_adjustments
, "constprop",
5177 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5178 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5180 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5181 /* Cloned edges can disappear during cloning as speculation can be
5182 resolved, check that we have one and that it comes from the last
5184 if (cs
&& cs
->caller
== new_node
)
5185 cs
->redirect_callee_duplicating_thunks (new_node
);
5186 /* Any future code that would make more than one clone of an outgoing
5187 edge would confuse this mechanism, so let's check that does not
5189 gcc_checking_assert (!cs
5190 || !get_next_cgraph_edge_clone (cs
)
5191 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5193 if (have_self_recursive_calls
)
5194 new_node
->expand_all_artificial_thunks ();
5196 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5197 for (av
= aggvals
; av
; av
= av
->next
)
5198 new_node
->maybe_create_reference (av
->value
, NULL
);
5200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5202 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5203 if (known_contexts
.exists ())
5205 for (i
= 0; i
< count
; i
++)
5206 if (!known_contexts
[i
].useless_p ())
5208 fprintf (dump_file
, " known ctx %i is ", i
);
5209 known_contexts
[i
].dump (dump_file
);
5213 ipa_dump_agg_replacement_values (dump_file
, aggvals
);
5216 new_info
= ipa_node_params_sum
->get (new_node
);
5217 new_info
->ipcp_orig_node
= node
;
5218 new_node
->ipcp_clone
= true;
5219 new_info
->known_csts
= known_csts
;
5220 new_info
->known_contexts
= known_contexts
;
5222 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
, aggvals
);
5227 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5228 pass-through function to itself when the cgraph_node involved is not an
5229 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5230 no-operation pass-through. */
5233 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5236 enum availability availability
;
5237 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5238 && availability
> AVAIL_INTERPOSABLE
5239 && jfunc
->type
== IPA_JF_PASS_THROUGH
5240 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5241 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5242 && ipa_node_params_sum
->get (cs
->caller
)
5243 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5248 /* Return true if JFUNC, which describes a part of an aggregate represented or
5249 pointed to by the i-th parameter of call CS, is a pass-through function to
5250 itself when the cgraph_node involved is not an IPA-CP clone.. When
5251 SIMPLE is true, further check if JFUNC is a simple no-operation
5255 self_recursive_agg_pass_through_p (cgraph_edge
*cs
, ipa_agg_jf_item
*jfunc
,
5256 int i
, bool simple
= true)
5258 enum availability availability
;
5259 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5260 && availability
> AVAIL_INTERPOSABLE
5261 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5262 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5263 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5264 && jfunc
->value
.pass_through
.formal_id
== i
5265 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5266 && ipa_node_params_sum
->get (cs
->caller
)
5267 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5272 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5273 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5276 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5277 vec
<tree
> &known_csts
,
5278 const vec
<cgraph_edge
*> &callers
)
5280 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5281 int i
, count
= ipa_get_param_count (info
);
5283 for (i
= 0; i
< count
; i
++)
5285 struct cgraph_edge
*cs
;
5286 tree newval
= NULL_TREE
;
5289 tree type
= ipa_get_type (info
, i
);
5291 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5294 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5296 struct ipa_jump_func
*jump_func
;
5299 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5301 || i
>= ipa_get_cs_argument_count (args
)
5303 && call_passes_through_thunk (cs
)))
5308 jump_func
= ipa_get_ith_jump_func (args
, i
);
5310 /* Besides simple pass-through jump function, arithmetic jump
5311 function could also introduce argument-direct-pass-through for
5312 self-feeding recursive call. For example,
5319 Given that i is 0, recursive propagation via (i & 1) also gets
5321 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5323 gcc_assert (newval
);
5324 t
= ipa_get_jf_arith_result (
5325 ipa_get_jf_pass_through_operation (jump_func
),
5327 ipa_get_jf_pass_through_operand (jump_func
),
5331 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5335 && !values_equal_for_ipcp_p (t
, newval
))
5336 || (!first
&& !newval
))
5348 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5350 fprintf (dump_file
, " adding an extra known scalar value ");
5351 print_ipcp_constant_value (dump_file
, newval
);
5352 fprintf (dump_file
, " for ");
5353 ipa_dump_param (dump_file
, info
, i
);
5354 fprintf (dump_file
, "\n");
5357 known_csts
[i
] = newval
;
5362 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5363 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5367 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5368 vec
<ipa_polymorphic_call_context
>
5370 const vec
<cgraph_edge
*> &callers
)
5372 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5373 int i
, count
= ipa_get_param_count (info
);
5375 for (i
= 0; i
< count
; i
++)
5379 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5380 || (known_contexts
->exists ()
5381 && !(*known_contexts
)[i
].useless_p ()))
5384 ipa_polymorphic_call_context newval
;
5388 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5390 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5392 || i
>= ipa_get_cs_argument_count (args
))
5394 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5395 ipa_polymorphic_call_context ctx
;
5396 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5404 newval
.meet_with (ctx
);
5405 if (newval
.useless_p ())
5409 if (!newval
.useless_p ())
5411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5413 fprintf (dump_file
, " adding an extra known polymorphic "
5415 print_ipcp_constant_value (dump_file
, newval
);
5416 fprintf (dump_file
, " for ");
5417 ipa_dump_param (dump_file
, info
, i
);
5418 fprintf (dump_file
, "\n");
5421 if (!known_contexts
->exists ())
5422 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5424 (*known_contexts
)[i
] = newval
;
5430 /* Go through PLATS and create a vector of values consisting of values and
5431 offsets (minus OFFSET) of lattices that contain only a single value. */
5433 static vec
<ipa_agg_value
>
5434 copy_plats_to_inter (class ipcp_param_lattices
*plats
, HOST_WIDE_INT offset
)
5436 vec
<ipa_agg_value
> res
= vNULL
;
5438 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5441 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
5442 if (aglat
->is_single_const ())
5444 struct ipa_agg_value ti
;
5445 ti
.offset
= aglat
->offset
- offset
;
5446 ti
.value
= aglat
->values
->value
;
5452 /* Intersect all values in INTER with single value lattices in PLATS (while
5453 subtracting OFFSET). */
5456 intersect_with_plats (class ipcp_param_lattices
*plats
,
5457 vec
<ipa_agg_value
> *inter
,
5458 HOST_WIDE_INT offset
)
5460 struct ipcp_agg_lattice
*aglat
;
5461 struct ipa_agg_value
*item
;
5464 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5470 aglat
= plats
->aggs
;
5471 FOR_EACH_VEC_ELT (*inter
, k
, item
)
5478 if (aglat
->offset
- offset
> item
->offset
)
5480 if (aglat
->offset
- offset
== item
->offset
)
5482 if (aglat
->is_single_const ())
5484 tree value
= aglat
->values
->value
;
5486 if (values_equal_for_ipcp_p (item
->value
, value
))
5491 aglat
= aglat
->next
;
5494 item
->value
= NULL_TREE
;
5498 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5499 vector result while subtracting OFFSET from the individual value offsets. */
5501 static vec
<ipa_agg_value
>
5502 agg_replacements_to_vector (struct cgraph_node
*node
, int index
,
5503 HOST_WIDE_INT offset
)
5505 struct ipa_agg_replacement_value
*av
;
5506 vec
<ipa_agg_value
> res
= vNULL
;
5508 for (av
= ipa_get_agg_replacements_for_node (node
); av
; av
= av
->next
)
5509 if (av
->index
== index
5510 && (av
->offset
- offset
) >= 0)
5512 struct ipa_agg_value item
;
5513 gcc_checking_assert (av
->value
);
5514 item
.offset
= av
->offset
- offset
;
5515 item
.value
= av
->value
;
5516 res
.safe_push (item
);
5522 /* Intersect all values in INTER with those that we have already scheduled to
5523 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5524 (while subtracting OFFSET). */
5527 intersect_with_agg_replacements (struct cgraph_node
*node
, int index
,
5528 vec
<ipa_agg_value
> *inter
,
5529 HOST_WIDE_INT offset
)
5531 struct ipa_agg_replacement_value
*srcvals
;
5532 struct ipa_agg_value
*item
;
5535 srcvals
= ipa_get_agg_replacements_for_node (node
);
5542 FOR_EACH_VEC_ELT (*inter
, i
, item
)
5544 struct ipa_agg_replacement_value
*av
;
5548 for (av
= srcvals
; av
; av
= av
->next
)
5550 gcc_checking_assert (av
->value
);
5551 if (av
->index
== index
5552 && av
->offset
- offset
== item
->offset
)
5554 if (values_equal_for_ipcp_p (item
->value
, av
->value
))
5560 item
->value
= NULL_TREE
;
5564 /* Intersect values in INTER with aggregate values that come along edge CS to
5565 parameter number INDEX and return it. If INTER does not actually exist yet,
5566 copy all incoming values to it. If we determine we ended up with no values
5567 whatsoever, return a released vector. */
5569 static vec
<ipa_agg_value
>
5570 intersect_aggregates_with_edge (struct cgraph_edge
*cs
, int index
,
5571 vec
<ipa_agg_value
> inter
)
5573 struct ipa_jump_func
*jfunc
;
5574 jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), index
);
5575 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5576 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5578 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5579 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5581 if (caller_info
->ipcp_orig_node
)
5583 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5584 class ipcp_param_lattices
*orig_plats
;
5585 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5586 orig_plats
= ipa_get_parm_lattices (orig_info
, src_idx
);
5587 if (agg_pass_through_permissible_p (orig_plats
, jfunc
))
5589 if (!inter
.exists ())
5590 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, 0);
5592 intersect_with_agg_replacements (cs
->caller
, src_idx
,
5599 class ipcp_param_lattices
*src_plats
;
5600 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5601 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
5603 /* Currently we do not produce clobber aggregate jump
5604 functions, adjust when we do. */
5605 gcc_checking_assert (!jfunc
->agg
.items
);
5606 if (!inter
.exists ())
5607 inter
= copy_plats_to_inter (src_plats
, 0);
5609 intersect_with_plats (src_plats
, &inter
, 0);
5614 else if (jfunc
->type
== IPA_JF_ANCESTOR
5615 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5617 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5618 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5619 class ipcp_param_lattices
*src_plats
;
5620 HOST_WIDE_INT delta
= ipa_get_jf_ancestor_offset (jfunc
);
5622 if (caller_info
->ipcp_orig_node
)
5624 if (!inter
.exists ())
5625 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, delta
);
5627 intersect_with_agg_replacements (cs
->caller
, src_idx
, &inter
,
5632 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5633 /* Currently we do not produce clobber aggregate jump
5634 functions, adjust when we do. */
5635 gcc_checking_assert (!src_plats
->aggs
|| !jfunc
->agg
.items
);
5636 if (!inter
.exists ())
5637 inter
= copy_plats_to_inter (src_plats
, delta
);
5639 intersect_with_plats (src_plats
, &inter
, delta
);
5644 if (jfunc
->agg
.items
)
5646 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5647 struct ipa_agg_value
*item
;
5650 if (!inter
.exists ())
5651 for (unsigned i
= 0; i
< jfunc
->agg
.items
->length (); i
++)
5653 struct ipa_agg_jf_item
*agg_item
= &(*jfunc
->agg
.items
)[i
];
5654 tree value
= ipa_agg_value_from_node (caller_info
, cs
->caller
,
5658 struct ipa_agg_value agg_value
;
5660 agg_value
.value
= value
;
5661 agg_value
.offset
= agg_item
->offset
;
5662 inter
.safe_push (agg_value
);
5666 FOR_EACH_VEC_ELT (inter
, k
, item
)
5674 while ((unsigned) l
< jfunc
->agg
.items
->length ())
5676 struct ipa_agg_jf_item
*ti
;
5677 ti
= &(*jfunc
->agg
.items
)[l
];
5678 if (ti
->offset
> item
->offset
)
5680 if (ti
->offset
== item
->offset
)
5684 /* Besides simple pass-through aggregate jump function,
5685 arithmetic aggregate jump function could also bring
5686 same aggregate value as parameter passed-in for
5687 self-feeding recursive call. For example,
5695 Given that *i is 0, recursive propagation via (*i & 1)
5697 if (self_recursive_agg_pass_through_p (cs
, ti
, index
,
5699 value
= ipa_get_jf_arith_result (
5700 ti
->value
.pass_through
.operation
,
5702 ti
->value
.pass_through
.operand
,
5705 value
= ipa_agg_value_from_node (caller_info
,
5708 if (value
&& values_equal_for_ipcp_p (item
->value
, value
))
5726 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5727 from all of them. */
5729 static struct ipa_agg_replacement_value
*
5730 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5731 const vec
<cgraph_edge
*> &callers
)
5733 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5734 struct ipa_agg_replacement_value
*res
;
5735 struct ipa_agg_replacement_value
**tail
= &res
;
5736 struct cgraph_edge
*cs
;
5737 int i
, j
, count
= ipa_get_param_count (dest_info
);
5739 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5741 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5747 int c
= ipa_get_cs_argument_count (args
);
5752 for (i
= 0; i
< count
; i
++)
5754 struct cgraph_edge
*cs
;
5755 vec
<ipa_agg_value
> inter
= vNULL
;
5756 struct ipa_agg_value
*item
;
5757 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, i
);
5760 /* Among other things, the following check should deal with all by_ref
5762 if (plats
->aggs_bottom
)
5765 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5767 struct ipa_jump_func
*jfunc
5768 = ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), i
);
5769 if (self_recursive_pass_through_p (cs
, jfunc
, i
)
5770 && (!plats
->aggs_by_ref
5771 || ipa_get_jf_pass_through_agg_preserved (jfunc
)))
5773 inter
= intersect_aggregates_with_edge (cs
, i
, inter
);
5775 if (!inter
.exists ())
5779 FOR_EACH_VEC_ELT (inter
, j
, item
)
5781 struct ipa_agg_replacement_value
*v
;
5786 v
= ggc_alloc
<ipa_agg_replacement_value
> ();
5788 v
->offset
= item
->offset
;
5789 v
->value
= item
->value
;
5790 v
->by_ref
= plats
->aggs_by_ref
;
5796 if (inter
.exists ())
5803 /* Determine whether CS also brings all scalar values that the NODE is
5807 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5808 struct cgraph_node
*node
)
5810 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5811 int count
= ipa_get_param_count (dest_info
);
5812 class ipa_node_params
*caller_info
;
5813 class ipa_edge_args
*args
;
5816 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5817 args
= ipa_edge_args_sum
->get (cs
);
5818 for (i
= 0; i
< count
; i
++)
5820 struct ipa_jump_func
*jump_func
;
5823 val
= dest_info
->known_csts
[i
];
5827 if (i
>= ipa_get_cs_argument_count (args
))
5829 jump_func
= ipa_get_ith_jump_func (args
, i
);
5830 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5831 ipa_get_type (dest_info
, i
));
5832 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5838 /* Determine whether CS also brings all aggregate values that NODE is
5841 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5842 struct cgraph_node
*node
)
5844 struct ipa_agg_replacement_value
*aggval
;
5847 aggval
= ipa_get_agg_replacements_for_node (node
);
5851 ipa_node_params
*clone_node_info
= ipa_node_params_sum
->get (node
);
5852 count
= ipa_get_param_count (clone_node_info
);
5853 ec
= ipa_get_cs_argument_count (ipa_edge_args_sum
->get (cs
));
5855 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5856 if (aggval
->index
>= ec
)
5859 ipa_node_params
*orig_node_info
5860 = ipa_node_params_sum
->get (clone_node_info
->ipcp_orig_node
);
5862 for (i
= 0; i
< count
; i
++)
5864 class ipcp_param_lattices
*plats
;
5865 bool interesting
= false;
5866 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5867 if (aggval
->index
== i
)
5875 plats
= ipa_get_parm_lattices (orig_node_info
, aggval
->index
);
5876 if (plats
->aggs_bottom
)
5879 vec
<ipa_agg_value
> values
= intersect_aggregates_with_edge (cs
, i
, vNULL
);
5880 if (!values
.exists ())
5883 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5884 if (aggval
->index
== i
)
5886 struct ipa_agg_value
*item
;
5889 FOR_EACH_VEC_ELT (values
, j
, item
)
5891 && item
->offset
== av
->offset
5892 && values_equal_for_ipcp_p (item
->value
, av
->value
))
5908 /* Given an original NODE and a VAL for which we have already created a
5909 specialized clone, look whether there are incoming edges that still lead
5910 into the old node but now also bring the requested value and also conform to
5911 all other criteria such that they can be redirected the special node.
5912 This function can therefore redirect the final edge in a SCC. */
5914 template <typename valtype
>
5916 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5918 ipcp_value_source
<valtype
> *src
;
5919 profile_count redirected_sum
= profile_count::zero ();
5921 for (src
= val
->sources
; src
; src
= src
->next
)
5923 struct cgraph_edge
*cs
= src
->cs
;
5926 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5927 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5928 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5931 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5932 cs
->caller
->dump_name (),
5933 val
->spec_node
->dump_name ());
5935 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5936 val
->spec_node
->expand_all_artificial_thunks ();
5937 if (cs
->count
.ipa ().initialized_p ())
5938 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5940 cs
= get_next_cgraph_edge_clone (cs
);
5944 if (redirected_sum
.nonzero_p ())
5945 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5948 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5951 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5953 ipa_polymorphic_call_context
*ctx
;
5956 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5957 if (!ctx
->useless_p ())
5962 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5964 static vec
<ipa_polymorphic_call_context
>
5965 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5967 if (known_contexts_useful_p (known_contexts
))
5968 return known_contexts
.copy ();
5973 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5974 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5978 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5979 vec
<tree
> *known_csts
,
5980 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5981 ipcp_value
<tree
> *val
, int index
)
5983 *known_csts
= avals
->m_known_vals
.copy ();
5984 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5985 (*known_csts
)[index
] = val
->value
;
5988 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5989 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5993 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5994 vec
<tree
> *known_csts
,
5995 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5996 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5999 *known_csts
= avals
->m_known_vals
.copy ();
6000 *known_contexts
= avals
->m_known_contexts
.copy ();
6001 (*known_contexts
)[index
] = val
->value
;
6004 /* Return true if OFFSET indicates this was not an aggregate value or there is
6005 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6009 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*aggvals
,
6010 int index
, HOST_WIDE_INT offset
, tree value
)
6017 if (aggvals
->index
== index
6018 && aggvals
->offset
== offset
6019 && values_equal_for_ipcp_p (aggvals
->value
, value
))
6021 aggvals
= aggvals
->next
;
6026 /* Return true if offset is minus one because source of a polymorphic context
6027 cannot be an aggregate value. */
6030 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*,
6031 int , HOST_WIDE_INT offset
,
6032 ipa_polymorphic_call_context
)
6034 return offset
== -1;
6037 /* Decide whether to create a special version of NODE for value VAL of
6038 parameter at the given INDEX. If OFFSET is -1, the value is for the
6039 parameter itself, otherwise it is stored at the given OFFSET of the
6040 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6041 is a vector which contains clones created for self-recursive calls with an
6042 arithmetic pass-through jump function. */
6044 template <typename valtype
>
6046 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6047 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6048 vec
<cgraph_node
*> *self_gen_clones
)
6050 struct ipa_agg_replacement_value
*aggvals
;
6053 profile_count count_sum
, rec_count_sum
;
6054 vec
<cgraph_edge
*> callers
;
6058 perhaps_add_new_callers (node
, val
);
6061 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6064 fprintf (dump_file
, " Ignoring candidate value because "
6065 "maximum unit size would be reached with %li.\n",
6066 val
->local_size_cost
+ overall_size
);
6069 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6070 &rec_count_sum
, &count_sum
))
6073 if (!dbg_cnt (ipa_cp_values
))
6076 if (val
->self_recursion_generated_p ())
6078 /* The edge counts in this case might not have been adjusted yet.
6079 Nevertleless, even if they were it would be only a guesswork which we
6080 can do now. The recursive part of the counts can be derived from the
6081 count of the original node anyway. */
6082 if (node
->count
.ipa ().nonzero_p ())
6084 unsigned dem
= self_gen_clones
->length () + 1;
6085 rec_count_sum
= node
->count
.ipa () / dem
;
6088 rec_count_sum
= profile_count::zero ();
6091 /* get_info_about_necessary_edges only sums up ipa counts. */
6092 count_sum
+= rec_count_sum
;
6094 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6096 fprintf (dump_file
, " - considering value ");
6097 print_ipcp_constant_value (dump_file
, val
->value
);
6098 fprintf (dump_file
, " for ");
6099 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6101 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6102 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6105 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6106 freq_sum
, count_sum
,
6107 val
->local_size_cost
)
6108 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6109 freq_sum
, count_sum
, val
->prop_size_cost
))
6113 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6114 node
->dump_name ());
6116 vec
<tree
> known_csts
;
6117 vec
<ipa_polymorphic_call_context
> known_contexts
;
6119 callers
= gather_edges_for_value (val
, node
, caller_count
);
6121 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6124 known_csts
= avals
->m_known_vals
.copy ();
6125 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6127 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6128 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6129 aggvals
= find_aggregate_values_for_callers_subset (node
, callers
);
6130 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6131 offset
, val
->value
));
6132 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6135 if (val
->self_recursion_generated_p ())
6136 self_gen_clones
->safe_push (val
->spec_node
);
6138 update_profiling_info (node
, val
->spec_node
);
6141 overall_size
+= val
->local_size_cost
;
6142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6143 fprintf (dump_file
, " overall size reached %li\n",
6146 /* TODO: If for some lattice there is only one other known value
6147 left, make a special node for it too. */
6152 /* Decide whether and what specialized clones of NODE should be created. */
6155 decide_whether_version_node (struct cgraph_node
*node
)
6157 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6158 int i
, count
= ipa_get_param_count (info
);
6164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6165 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6166 node
->dump_name ());
6168 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6169 ipa_auto_call_arg_values avals
;
6170 gather_context_independent_values (info
, &avals
, false, NULL
);
6172 for (i
= 0; i
< count
;i
++)
6174 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6175 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6176 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6179 && !avals
.m_known_vals
[i
])
6181 ipcp_value
<tree
> *val
;
6182 for (val
= lat
->values
; val
; val
= val
->next
)
6184 /* If some values generated for self-recursive calls with
6185 arithmetic jump functions fall outside of the known
6186 value_range for the parameter, we can skip them. VR interface
6187 supports this only for integers now. */
6188 if (TREE_CODE (val
->value
) == INTEGER_CST
6189 && !plats
->m_value_range
.bottom_p ()
6190 && !plats
->m_value_range
.m_vr
.contains_p (val
->value
))
6192 /* This can happen also if a constant present in the source
6193 code falls outside of the range of parameter's type, so we
6195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6197 fprintf (dump_file
, " - skipping%s value ",
6198 val
->self_recursion_generated_p ()
6199 ? " self_recursion_generated" : "");
6200 print_ipcp_constant_value (dump_file
, val
->value
);
6201 fprintf (dump_file
, " because it is outside known "
6206 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6211 if (!plats
->aggs_bottom
)
6213 struct ipcp_agg_lattice
*aglat
;
6214 ipcp_value
<tree
> *val
;
6215 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6216 if (!aglat
->bottom
&& aglat
->values
6217 /* If the following is false, the one value has been considered
6218 for cloning for all contexts. */
6219 && (plats
->aggs_contain_variable
6220 || !aglat
->is_single_const ()))
6221 for (val
= aglat
->values
; val
; val
= val
->next
)
6222 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6227 && avals
.m_known_contexts
[i
].useless_p ())
6229 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6230 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6231 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6236 if (!self_gen_clones
.is_empty ())
6238 self_gen_clones
.safe_push (node
);
6239 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6242 if (info
->do_clone_for_all_contexts
)
6244 if (!dbg_cnt (ipa_cp_values
))
6246 info
->do_clone_for_all_contexts
= false;
6250 struct cgraph_node
*clone
;
6251 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6253 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6255 cgraph_edge
*cs
= callers
[i
];
6256 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6258 if (caller_info
&& caller_info
->node_dead
)
6259 callers
.unordered_remove (i
);
6262 if (!adjust_callers_for_value_intersection (callers
, node
))
6264 /* If node is not called by anyone, or all its caller edges are
6265 self-recursive, the node is not really in use, no need to do
6267 info
->do_clone_for_all_contexts
= false;
6272 fprintf (dump_file
, " - Creating a specialized node of %s "
6273 "for all known contexts.\n", node
->dump_name ());
6275 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6276 vec
<ipa_polymorphic_call_context
> known_contexts
6277 = copy_useful_known_contexts (avals
.m_known_contexts
);
6278 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6279 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6280 ipa_agg_replacement_value
*aggvals
6281 = find_aggregate_values_for_callers_subset (node
, callers
);
6283 if (!known_contexts_useful_p (known_contexts
))
6285 known_contexts
.release ();
6286 known_contexts
= vNULL
;
6288 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6290 info
->do_clone_for_all_contexts
= false;
6291 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6298 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6301 spread_undeadness (struct cgraph_node
*node
)
6303 struct cgraph_edge
*cs
;
6305 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6306 if (ipa_edge_within_scc (cs
))
6308 struct cgraph_node
*callee
;
6309 class ipa_node_params
*info
;
6311 callee
= cs
->callee
->function_symbol (NULL
);
6312 info
= ipa_node_params_sum
->get (callee
);
6314 if (info
&& info
->node_dead
)
6316 info
->node_dead
= 0;
6317 spread_undeadness (callee
);
6322 /* Return true if NODE has a caller from outside of its SCC that is not
6323 dead. Worker callback for cgraph_for_node_and_aliases. */
6326 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6327 void *data ATTRIBUTE_UNUSED
)
6329 struct cgraph_edge
*cs
;
6331 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6332 if (cs
->caller
->thunk
6333 && cs
->caller
->call_for_symbol_thunks_and_aliases
6334 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6336 else if (!ipa_edge_within_scc (cs
))
6338 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6339 if (!caller_info
/* Unoptimized caller are like dead ones. */
6340 || !caller_info
->node_dead
)
6347 /* Identify nodes within the same SCC as NODE which are no longer needed
6348 because of new clones and will be removed as unreachable. */
6351 identify_dead_nodes (struct cgraph_node
*node
)
6353 struct cgraph_node
*v
;
6354 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6357 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6359 && !v
->call_for_symbol_thunks_and_aliases
6360 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6361 info
->node_dead
= 1;
6364 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6366 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6367 if (info
&& !info
->node_dead
)
6368 spread_undeadness (v
);
6371 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6373 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6374 if (ipa_node_params_sum
->get (v
)
6375 && ipa_node_params_sum
->get (v
)->node_dead
)
6376 fprintf (dump_file
, " Marking node as dead: %s.\n",
6381 /* The decision stage. Iterate over the topological order of call graph nodes
6382 TOPO and make specialized clones if deemed beneficial. */
6385 ipcp_decision_stage (class ipa_topo_info
*topo
)
6390 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6392 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6394 struct cgraph_node
*node
= topo
->order
[i
];
6395 bool change
= false, iterate
= true;
6399 struct cgraph_node
*v
;
6401 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6402 if (v
->has_gimple_body_p ()
6403 && ipcp_versionable_function_p (v
))
6404 iterate
|= decide_whether_version_node (v
);
6409 identify_dead_nodes (node
);
6413 /* Look up all the bits information that we have discovered and copy it over
6414 to the transformation summary. */
6417 ipcp_store_bits_results (void)
6421 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6423 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6424 bool dumped_sth
= false;
6425 bool found_useful_result
= false;
6427 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
6430 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
6431 "; -fipa-bit-cp: disabled.\n",
6432 node
->dump_name ());
6436 if (info
->ipcp_orig_node
)
6437 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6438 if (!info
->lattices
)
6439 /* Newly expanded artificial thunks do not have lattices. */
6442 unsigned count
= ipa_get_param_count (info
);
6443 for (unsigned i
= 0; i
< count
; i
++)
6445 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6446 if (plats
->bits_lattice
.constant_p ())
6448 found_useful_result
= true;
6453 if (!found_useful_result
)
6456 ipcp_transformation_initialize ();
6457 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6458 vec_safe_reserve_exact (ts
->bits
, count
);
6460 for (unsigned i
= 0; i
< count
; i
++)
6462 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6465 if (plats
->bits_lattice
.constant_p ())
6468 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
6469 plats
->bits_lattice
.get_mask ());
6470 if (!dbg_cnt (ipa_cp_bits
))
6476 ts
->bits
->quick_push (jfbits
);
6477 if (!dump_file
|| !jfbits
)
6481 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6482 node
->dump_name ());
6485 fprintf (dump_file
, " param %i: value = ", i
);
6486 print_hex (jfbits
->value
, dump_file
);
6487 fprintf (dump_file
, ", mask = ");
6488 print_hex (jfbits
->mask
, dump_file
);
6489 fprintf (dump_file
, "\n");
6494 /* Look up all VR information that we have discovered and copy it over
6495 to the transformation summary. */
6498 ipcp_store_vr_results (void)
6502 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6504 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6505 bool found_useful_result
= false;
6507 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6510 fprintf (dump_file
, "Not considering %s for VR discovery "
6511 "and propagate; -fipa-ipa-vrp: disabled.\n",
6512 node
->dump_name ());
6516 if (info
->ipcp_orig_node
)
6517 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6518 if (!info
->lattices
)
6519 /* Newly expanded artificial thunks do not have lattices. */
6522 unsigned count
= ipa_get_param_count (info
);
6523 for (unsigned i
= 0; i
< count
; i
++)
6525 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6526 if (!plats
->m_value_range
.bottom_p ()
6527 && !plats
->m_value_range
.top_p ())
6529 found_useful_result
= true;
6533 if (!found_useful_result
)
6536 ipcp_transformation_initialize ();
6537 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6538 vec_safe_reserve_exact (ts
->m_vr
, count
);
6540 for (unsigned i
= 0; i
< count
; i
++)
6542 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6545 if (!plats
->m_value_range
.bottom_p ()
6546 && !plats
->m_value_range
.top_p ()
6547 && dbg_cnt (ipa_cp_vr
))
6550 vr
.type
= plats
->m_value_range
.m_vr
.kind ();
6551 vr
.min
= wi::to_wide (plats
->m_value_range
.m_vr
.min ());
6552 vr
.max
= wi::to_wide (plats
->m_value_range
.m_vr
.max ());
6557 vr
.type
= VR_VARYING
;
6558 vr
.min
= vr
.max
= wi::zero (INT_TYPE_SIZE
);
6560 ts
->m_vr
->quick_push (vr
);
6565 /* The IPCP driver. */
6570 class ipa_topo_info topo
;
6572 if (edge_clone_summaries
== NULL
)
6573 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6575 ipa_check_create_node_params ();
6576 ipa_check_create_edge_args ();
6577 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6581 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6582 if (dump_flags
& TDF_DETAILS
)
6583 ipa_print_all_params (dump_file
);
6584 ipa_print_all_jump_functions (dump_file
);
6587 /* Topological sort. */
6588 build_toporder_info (&topo
);
6589 /* Do the interprocedural propagation. */
6590 ipcp_propagate_stage (&topo
);
6591 /* Decide what constant propagation and cloning should be performed. */
6592 ipcp_decision_stage (&topo
);
6593 /* Store results of bits propagation. */
6594 ipcp_store_bits_results ();
6595 /* Store results of value range propagation. */
6596 ipcp_store_vr_results ();
6598 /* Free all IPCP structures. */
6599 delete clone_num_suffixes
;
6600 free_toporder_info (&topo
);
6601 delete edge_clone_summaries
;
6602 edge_clone_summaries
= NULL
;
6603 ipa_free_all_structures_after_ipa_cp ();
6605 fprintf (dump_file
, "\nIPA constant propagation end\n");
6609 /* Initialization and computation of IPCP data structures. This is the initial
6610 intraprocedural analysis of functions, which gathers information to be
6611 propagated later on. */
6614 ipcp_generate_summary (void)
6616 struct cgraph_node
*node
;
6619 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6620 ipa_register_cgraph_hooks ();
6622 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6623 ipa_analyze_node (node
);
6628 const pass_data pass_data_ipa_cp
=
6630 IPA_PASS
, /* type */
6632 OPTGROUP_NONE
, /* optinfo_flags */
6633 TV_IPA_CONSTANT_PROP
, /* tv_id */
6634 0, /* properties_required */
6635 0, /* properties_provided */
6636 0, /* properties_destroyed */
6637 0, /* todo_flags_start */
6638 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6641 class pass_ipa_cp
: public ipa_opt_pass_d
6644 pass_ipa_cp (gcc::context
*ctxt
)
6645 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6646 ipcp_generate_summary
, /* generate_summary */
6647 NULL
, /* write_summary */
6648 NULL
, /* read_summary */
6649 ipcp_write_transformation_summaries
, /*
6650 write_optimization_summary */
6651 ipcp_read_transformation_summaries
, /*
6652 read_optimization_summary */
6653 NULL
, /* stmt_fixup */
6654 0, /* function_transform_todo_flags_start */
6655 ipcp_transform_function
, /* function_transform */
6656 NULL
) /* variable_transform */
6659 /* opt_pass methods: */
6660 bool gate (function
*) final override
6662 /* FIXME: We should remove the optimize check after we ensure we never run
6663 IPA passes when not optimizing. */
6664 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6667 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6669 }; // class pass_ipa_cp
6674 make_pass_ipa_cp (gcc::context
*ctxt
)
6676 return new pass_ipa_cp (ctxt
);
6679 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6680 within the same process. For use by toplev::finalize. */
6683 ipa_cp_cc_finalize (void)
6685 base_count
= profile_count::uninitialized ();
6687 orig_overall_size
= 0;
6688 ipcp_free_transformation_sum ();