1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
105 #include "coretypes.h"
108 #include "gimple-expr.h"
110 #include "alloc-pool.h"
111 #include "tree-pass.h"
113 #include "diagnostic.h"
114 #include "fold-const.h"
115 #include "gimple-fold.h"
116 #include "symbol-summary.h"
117 #include "tree-vrp.h"
118 #include "ipa-prop.h"
119 #include "tree-pretty-print.h"
120 #include "tree-inline.h"
121 #include "ipa-fnsummary.h"
122 #include "ipa-utils.h"
123 #include "tree-ssa-ccp.h"
124 #include "stringpool.h"
128 template <typename valtype
> class ipcp_value
;
130 /* Describes a particular source for an IPA-CP value. */
132 template <typename valtype
>
133 struct ipcp_value_source
136 /* Aggregate offset of the source, negative if the source is scalar value of
137 the argument itself. */
138 HOST_WIDE_INT offset
;
139 /* The incoming edge that brought the value. */
141 /* If the jump function that resulted into his value was a pass-through or an
142 ancestor, this is the ipcp_value of the caller from which the described
143 value has been derived. Otherwise it is NULL. */
144 ipcp_value
<valtype
> *val
;
145 /* Next pointer in a linked list of sources of a value. */
146 ipcp_value_source
*next
;
147 /* If the jump function that resulted into his value was a pass-through or an
148 ancestor, this is the index of the parameter of the caller the jump
149 function references. */
153 /* Common ancestor for all ipcp_value instantiations. */
155 class ipcp_value_base
158 /* Time benefit and size cost that specializing the function for this value
159 would bring about in this function alone. */
160 int local_time_benefit
, local_size_cost
;
161 /* Time benefit and size cost that specializing the function for this value
162 can bring about in it's callees (transitively). */
163 int prop_time_benefit
, prop_size_cost
;
166 : local_time_benefit (0), local_size_cost (0),
167 prop_time_benefit (0), prop_size_cost (0) {}
170 /* Describes one particular value stored in struct ipcp_lattice. */
172 template <typename valtype
>
173 class ipcp_value
: public ipcp_value_base
176 /* The actual value for the given parameter. */
178 /* The list of sources from which this value originates. */
179 ipcp_value_source
<valtype
> *sources
;
180 /* Next pointers in a linked list of all values in a lattice. */
182 /* Next pointers in a linked list of values in a strongly connected component
184 ipcp_value
*scc_next
;
185 /* Next pointers in a linked list of SCCs of values sorted topologically
186 according their sources. */
187 ipcp_value
*topo_next
;
188 /* A specialized node created for this value, NULL if none has been (so far)
190 cgraph_node
*spec_node
;
191 /* Depth first search number and low link for topological sorting of
194 /* True if this value is currently on the topo-sort stack. */
198 : sources (0), next (0), scc_next (0), topo_next (0),
199 spec_node (0), dfs (0), low_link (0), on_stack (false) {}
201 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
202 HOST_WIDE_INT offset
);
205 /* Lattice describing potential values of a formal parameter of a function, or
206 a part of an aggregate. TOP is represented by a lattice with zero values
207 and with contains_variable and bottom flags cleared. BOTTOM is represented
208 by a lattice with the bottom flag set. In that case, values and
209 contains_variable flag should be disregarded. */
211 template <typename valtype
>
215 /* The list of known values and types in this lattice. Note that values are
216 not deallocated if a lattice is set to bottom because there may be value
217 sources referencing them. */
218 ipcp_value
<valtype
> *values
;
219 /* Number of known values and types in this lattice. */
221 /* The lattice contains a variable component (in addition to values). */
222 bool contains_variable
;
223 /* The value of the lattice is bottom (i.e. variable and unusable for any
227 inline bool is_single_const ();
228 inline bool set_to_bottom ();
229 inline bool set_contains_variable ();
230 bool add_value (valtype newval
, cgraph_edge
*cs
,
231 ipcp_value
<valtype
> *src_val
= NULL
,
232 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
233 ipcp_value
<valtype
> **val_p
= NULL
,
234 bool unlimited
= false);
235 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
238 /* Lattice of tree values with an offset to describe a part of an
241 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
244 /* Offset that is being described by this lattice. */
245 HOST_WIDE_INT offset
;
246 /* Size so that we don't have to re-compute it every time we traverse the
247 list. Must correspond to TYPE_SIZE of all lat values. */
249 /* Next element of the linked list. */
250 struct ipcp_agg_lattice
*next
;
253 /* Lattice of known bits, only capable of holding one value.
254 Bitwise constant propagation propagates which bits of a
270 In the above case, the param 'x' will always have all
271 the bits (except the bits in lsb) set to 0.
272 Hence the mask of 'x' would be 0xff. The mask
273 reflects that the bits in lsb are unknown.
274 The actual propagated value is given by m_value & ~m_mask. */
276 class ipcp_bits_lattice
279 bool bottom_p () { return m_lattice_val
== IPA_BITS_VARYING
; }
280 bool top_p () { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
281 bool constant_p () { return m_lattice_val
== IPA_BITS_CONSTANT
; }
282 bool set_to_bottom ();
283 bool set_to_constant (widest_int
, widest_int
);
285 widest_int
get_value () { return m_value
; }
286 widest_int
get_mask () { return m_mask
; }
288 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
289 enum tree_code
, tree
);
291 bool meet_with (widest_int
, widest_int
, unsigned);
296 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
298 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
299 If a bit in mask is set to 0, then the corresponding bit in
300 value is known to be constant. */
301 widest_int m_value
, m_mask
;
303 bool meet_with_1 (widest_int
, widest_int
, unsigned);
304 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
307 /* Lattice of value ranges. */
309 class ipcp_vr_lattice
314 inline bool bottom_p () const;
315 inline bool top_p () const;
316 inline bool set_to_bottom ();
317 bool meet_with (const value_range
*p_vr
);
318 bool meet_with (const ipcp_vr_lattice
&other
);
319 void init () { gcc_assert (m_vr
.undefined_p ()); }
320 void print (FILE * f
);
323 bool meet_with_1 (const value_range
*other_vr
);
326 /* Structure containing lattices for a parameter itself and for pieces of
327 aggregates that are passed in the parameter or by a reference in a parameter
328 plus some other useful flags. */
330 class ipcp_param_lattices
333 /* Lattice describing the value of the parameter itself. */
334 ipcp_lattice
<tree
> itself
;
335 /* Lattice describing the polymorphic contexts of a parameter. */
336 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
337 /* Lattices describing aggregate parts. */
338 ipcp_agg_lattice
*aggs
;
339 /* Lattice describing known bits. */
340 ipcp_bits_lattice bits_lattice
;
341 /* Lattice describing value range. */
342 ipcp_vr_lattice m_value_range
;
343 /* Number of aggregate lattices */
345 /* True if aggregate data were passed by reference (as opposed to by
348 /* All aggregate lattices contain a variable component (in addition to
350 bool aggs_contain_variable
;
351 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
352 for any propagation). */
355 /* There is a virtual call based on this parameter. */
359 /* Allocation pools for values and their sources in ipa-cp. */
361 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
362 ("IPA-CP constant values");
364 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
365 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
367 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
368 ("IPA-CP value sources");
370 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
371 ("IPA_CP aggregate lattices");
373 /* Maximal count found in program. */
375 static profile_count max_count
;
377 /* Original overall size of the program. */
379 static long overall_size
, orig_overall_size
;
381 /* Node name to unique clone suffix number map. */
382 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
384 /* Return the param lattices structure corresponding to the Ith formal
385 parameter of the function described by INFO. */
386 static inline class ipcp_param_lattices
*
387 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
389 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
390 gcc_checking_assert (!info
->ipcp_orig_node
);
391 gcc_checking_assert (info
->lattices
);
392 return &(info
->lattices
[i
]);
395 /* Return the lattice corresponding to the scalar value of the Ith formal
396 parameter of the function described by INFO. */
397 static inline ipcp_lattice
<tree
> *
398 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
400 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
401 return &plats
->itself
;
404 /* Return the lattice corresponding to the scalar value of the Ith formal
405 parameter of the function described by INFO. */
406 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
407 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
409 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
410 return &plats
->ctxlat
;
413 /* Return whether LAT is a lattice with a single constant and without an
416 template <typename valtype
>
418 ipcp_lattice
<valtype
>::is_single_const ()
420 if (bottom
|| contains_variable
|| values_count
!= 1)
426 /* Print V which is extracted from a value in a lattice to F. */
429 print_ipcp_constant_value (FILE * f
, tree v
)
431 if (TREE_CODE (v
) == ADDR_EXPR
432 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
435 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
438 print_generic_expr (f
, v
);
441 /* Print V which is extracted from a value in a lattice to F. */
444 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
449 /* Print a lattice LAT to F. */
451 template <typename valtype
>
453 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
455 ipcp_value
<valtype
> *val
;
460 fprintf (f
, "BOTTOM\n");
464 if (!values_count
&& !contains_variable
)
466 fprintf (f
, "TOP\n");
470 if (contains_variable
)
472 fprintf (f
, "VARIABLE");
478 for (val
= values
; val
; val
= val
->next
)
480 if (dump_benefits
&& prev
)
482 else if (!dump_benefits
&& prev
)
487 print_ipcp_constant_value (f
, val
->value
);
491 ipcp_value_source
<valtype
> *s
;
493 fprintf (f
, " [from:");
494 for (s
= val
->sources
; s
; s
= s
->next
)
495 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
496 s
->cs
->sreal_frequency ().to_double ());
501 fprintf (f
, " [loc_time: %i, loc_size: %i, "
502 "prop_time: %i, prop_size: %i]\n",
503 val
->local_time_benefit
, val
->local_size_cost
,
504 val
->prop_time_benefit
, val
->prop_size_cost
);
511 ipcp_bits_lattice::print (FILE *f
)
514 fprintf (f
, " Bits unknown (TOP)\n");
515 else if (bottom_p ())
516 fprintf (f
, " Bits unusable (BOTTOM)\n");
519 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
520 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
525 /* Print value range lattice to F. */
528 ipcp_vr_lattice::print (FILE * f
)
530 dump_value_range (f
, &m_vr
);
533 /* Print all ipcp_lattices of all functions to F. */
536 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
538 struct cgraph_node
*node
;
541 fprintf (f
, "\nLattices:\n");
542 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
544 class ipa_node_params
*info
;
546 info
= IPA_NODE_REF (node
);
547 /* Skip unoptimized functions and constprop clones since we don't make
548 lattices for them. */
549 if (!info
|| info
->ipcp_orig_node
)
551 fprintf (f
, " Node: %s:\n", node
->dump_name ());
552 count
= ipa_get_param_count (info
);
553 for (i
= 0; i
< count
; i
++)
555 struct ipcp_agg_lattice
*aglat
;
556 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
557 fprintf (f
, " param [%d]: ", i
);
558 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
559 fprintf (f
, " ctxs: ");
560 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
561 plats
->bits_lattice
.print (f
);
563 plats
->m_value_range
.print (f
);
565 if (plats
->virt_call
)
566 fprintf (f
, " virt_call flag set\n");
568 if (plats
->aggs_bottom
)
570 fprintf (f
, " AGGS BOTTOM\n");
573 if (plats
->aggs_contain_variable
)
574 fprintf (f
, " AGGS VARIABLE\n");
575 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
577 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
578 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
579 aglat
->print (f
, dump_sources
, dump_benefits
);
585 /* Determine whether it is at all technically possible to create clones of NODE
586 and store this information in the ipa_node_params structure associated
590 determine_versionability (struct cgraph_node
*node
,
591 class ipa_node_params
*info
)
593 const char *reason
= NULL
;
595 /* There are a number of generic reasons functions cannot be versioned. We
596 also cannot remove parameters if there are type attributes such as fnspec
598 if (node
->alias
|| node
->thunk
)
599 reason
= "alias or thunk";
600 else if (!node
->versionable
)
601 reason
= "not a tree_versionable_function";
602 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
603 reason
= "insufficient body availability";
604 else if (!opt_for_fn (node
->decl
, optimize
)
605 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
606 reason
= "non-optimized function";
607 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
609 /* Ideally we should clone the SIMD clones themselves and create
610 vector copies of them, so IPA-cp and SIMD clones can happily
611 coexist, but that may not be worth the effort. */
612 reason
= "function has SIMD clones";
614 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
616 /* Ideally we should clone the target clones themselves and create
617 copies of them, so IPA-cp and target clones can happily
618 coexist, but that may not be worth the effort. */
619 reason
= "function target_clones attribute";
621 /* Don't clone decls local to a comdat group; it breaks and for C++
622 decloned constructors, inlining is always better anyway. */
623 else if (node
->comdat_local_p ())
624 reason
= "comdat-local function";
625 else if (node
->calls_comdat_local
)
627 /* TODO: call is versionable if we make sure that all
628 callers are inside of a comdat group. */
629 reason
= "calls comdat-local function";
632 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
633 work only when inlined. Cloning them may still lead to better code
634 because ipa-cp will not give up on cloning further. If the function is
635 external this however leads to wrong code because we may end up producing
636 offline copy of the function. */
637 if (DECL_EXTERNAL (node
->decl
))
638 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
639 edge
= edge
->next_callee
)
640 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
642 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
643 reason
= "external function which calls va_arg_pack";
644 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
645 == BUILT_IN_VA_ARG_PACK_LEN
)
646 reason
= "external function which calls va_arg_pack_len";
649 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
650 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
651 node
->dump_name (), reason
);
653 info
->versionable
= (reason
== NULL
);
656 /* Return true if it is at all technically possible to create clones of a
660 ipcp_versionable_function_p (struct cgraph_node
*node
)
662 return IPA_NODE_REF (node
) && IPA_NODE_REF (node
)->versionable
;
665 /* Structure holding accumulated information about callers of a node. */
667 struct caller_statistics
669 profile_count count_sum
;
670 int n_calls
, n_hot_calls
, freq_sum
;
673 /* Initialize fields of STAT to zeroes. */
676 init_caller_stats (struct caller_statistics
*stats
)
678 stats
->count_sum
= profile_count::zero ();
680 stats
->n_hot_calls
= 0;
684 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
685 non-thunk incoming edges to NODE. */
688 gather_caller_stats (struct cgraph_node
*node
, void *data
)
690 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
691 struct cgraph_edge
*cs
;
693 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
694 if (!cs
->caller
->thunk
)
696 if (cs
->count
.ipa ().initialized_p ())
697 stats
->count_sum
+= cs
->count
.ipa ();
698 stats
->freq_sum
+= cs
->frequency ();
700 if (cs
->maybe_hot_p ())
701 stats
->n_hot_calls
++;
707 /* Return true if this NODE is viable candidate for cloning. */
710 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
712 struct caller_statistics stats
;
714 gcc_checking_assert (node
->has_gimple_body_p ());
716 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
719 fprintf (dump_file
, "Not considering %s for cloning; "
720 "-fipa-cp-clone disabled.\n",
725 if (node
->optimize_for_size_p ())
728 fprintf (dump_file
, "Not considering %s for cloning; "
729 "optimizing it for size.\n",
734 init_caller_stats (&stats
);
735 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
737 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
740 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
745 /* When profile is available and function is hot, propagate into it even if
746 calls seems cold; constant propagation can improve function's speed
748 if (max_count
> profile_count::zero ())
750 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
753 fprintf (dump_file
, "Considering %s for cloning; "
754 "usually called directly.\n",
759 if (!stats
.n_hot_calls
)
762 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
767 fprintf (dump_file
, "Considering %s for cloning.\n",
772 template <typename valtype
>
773 class value_topo_info
776 /* Head of the linked list of topologically sorted values. */
777 ipcp_value
<valtype
> *values_topo
;
778 /* Stack for creating SCCs, represented by a linked list too. */
779 ipcp_value
<valtype
> *stack
;
780 /* Counter driving the algorithm in add_val_to_toposort. */
783 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
785 void add_val (ipcp_value
<valtype
> *cur_val
);
786 void propagate_effects ();
789 /* Arrays representing a topological ordering of call graph nodes and a stack
790 of nodes used during constant propagation and also data required to perform
791 topological sort of values and propagation of benefits in the determined
797 /* Array with obtained topological order of cgraph nodes. */
798 struct cgraph_node
**order
;
799 /* Stack of cgraph nodes used during propagation within SCC until all values
800 in the SCC stabilize. */
801 struct cgraph_node
**stack
;
802 int nnodes
, stack_top
;
804 value_topo_info
<tree
> constants
;
805 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
807 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
812 /* Skip edges from and to nodes without ipa_cp enabled.
813 Ignore not available symbols. */
816 ignore_edge_p (cgraph_edge
*e
)
818 enum availability avail
;
819 cgraph_node
*ultimate_target
820 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
822 return (avail
<= AVAIL_INTERPOSABLE
823 || !opt_for_fn (ultimate_target
->decl
, optimize
)
824 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
827 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
830 build_toporder_info (class ipa_topo_info
*topo
)
832 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
833 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
835 gcc_checking_assert (topo
->stack_top
== 0);
836 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
840 /* Free information about strongly connected components and the arrays in
844 free_toporder_info (class ipa_topo_info
*topo
)
846 ipa_free_postorder_info ();
851 /* Add NODE to the stack in TOPO, unless it is already there. */
854 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
856 class ipa_node_params
*info
= IPA_NODE_REF (node
);
857 if (info
->node_enqueued
)
859 info
->node_enqueued
= 1;
860 topo
->stack
[topo
->stack_top
++] = node
;
863 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
866 static struct cgraph_node
*
867 pop_node_from_stack (class ipa_topo_info
*topo
)
871 struct cgraph_node
*node
;
873 node
= topo
->stack
[topo
->stack_top
];
874 IPA_NODE_REF (node
)->node_enqueued
= 0;
881 /* Set lattice LAT to bottom and return true if it previously was not set as
884 template <typename valtype
>
886 ipcp_lattice
<valtype
>::set_to_bottom ()
893 /* Mark lattice as containing an unknown value and return true if it previously
894 was not marked as such. */
896 template <typename valtype
>
898 ipcp_lattice
<valtype
>::set_contains_variable ()
900 bool ret
= !contains_variable
;
901 contains_variable
= true;
905 /* Set all aggregate lattices in PLATS to bottom and return true if they were
906 not previously set as such. */
909 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
911 bool ret
= !plats
->aggs_bottom
;
912 plats
->aggs_bottom
= true;
916 /* Mark all aggregate lattices in PLATS as containing an unknown value and
917 return true if they were not previously marked as such. */
920 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
922 bool ret
= !plats
->aggs_contain_variable
;
923 plats
->aggs_contain_variable
= true;
928 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
930 return meet_with_1 (&other
.m_vr
);
933 /* Meet the current value of the lattice with value range described by VR
937 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
939 return meet_with_1 (p_vr
);
942 /* Meet the current value of the lattice with value range described by
943 OTHER_VR lattice. Return TRUE if anything changed. */
946 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
951 if (other_vr
->varying_p ())
952 return set_to_bottom ();
954 value_range
save (m_vr
);
955 m_vr
.union_ (other_vr
);
956 return !m_vr
.equal_p (save
);
959 /* Return true if value range information in the lattice is yet unknown. */
962 ipcp_vr_lattice::top_p () const
964 return m_vr
.undefined_p ();
967 /* Return true if value range information in the lattice is known to be
971 ipcp_vr_lattice::bottom_p () const
973 return m_vr
.varying_p ();
976 /* Set value range information in the lattice to bottom. Return true if it
977 previously was in a different state. */
980 ipcp_vr_lattice::set_to_bottom ()
982 if (m_vr
.varying_p ())
984 /* ?? We create all sorts of VARYING ranges for floats, structures,
985 and other types which we cannot handle as ranges. We should
986 probably avoid handling them throughout the pass, but it's easier
987 to create a sensible VARYING here and let the lattice
989 m_vr
.set_varying (integer_type_node
);
993 /* Set lattice value to bottom, if it already isn't the case. */
996 ipcp_bits_lattice::set_to_bottom ()
1000 m_lattice_val
= IPA_BITS_VARYING
;
1006 /* Set to constant if it isn't already. Only meant to be called
1007 when switching state from TOP. */
1010 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1012 gcc_assert (top_p ());
1013 m_lattice_val
= IPA_BITS_CONSTANT
;
1014 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1019 /* Convert operand to value, mask form. */
1022 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1024 wide_int
get_nonzero_bits (const_tree
);
1026 if (TREE_CODE (operand
) == INTEGER_CST
)
1028 *valuep
= wi::to_widest (operand
);
1038 /* Meet operation, similar to ccp_lattice_meet, we xor values
1039 if this->value, value have different values at same bit positions, we want
1040 to drop that bit to varying. Return true if mask is changed.
1041 This function assumes that the lattice value is in CONSTANT state */
1044 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1047 gcc_assert (constant_p ());
1049 widest_int old_mask
= m_mask
;
1050 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1053 if (wi::sext (m_mask
, precision
) == -1)
1054 return set_to_bottom ();
1056 return m_mask
!= old_mask
;
1059 /* Meet the bits lattice with operand
1060 described by <value, mask, sgn, precision. */
1063 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1071 if (wi::sext (mask
, precision
) == -1)
1072 return set_to_bottom ();
1073 return set_to_constant (value
, mask
);
1076 return meet_with_1 (value
, mask
, precision
);
1079 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1080 if code is binary operation or bit_value_unop (other) if code is unary op.
1081 In the case when code is nop_expr, no adjustment is required. */
1084 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1085 signop sgn
, enum tree_code code
, tree operand
)
1087 if (other
.bottom_p ())
1088 return set_to_bottom ();
1090 if (bottom_p () || other
.top_p ())
1093 widest_int adjusted_value
, adjusted_mask
;
1095 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1097 tree type
= TREE_TYPE (operand
);
1098 widest_int o_value
, o_mask
;
1099 get_value_and_mask (operand
, &o_value
, &o_mask
);
1101 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1102 sgn
, precision
, other
.get_value (), other
.get_mask (),
1103 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1105 if (wi::sext (adjusted_mask
, precision
) == -1)
1106 return set_to_bottom ();
1109 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1111 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1112 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1115 if (wi::sext (adjusted_mask
, precision
) == -1)
1116 return set_to_bottom ();
1120 return set_to_bottom ();
1124 if (wi::sext (adjusted_mask
, precision
) == -1)
1125 return set_to_bottom ();
1126 return set_to_constant (adjusted_value
, adjusted_mask
);
1129 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
);
1132 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1133 return true is any of them has not been marked as such so far. */
1136 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1139 ret
= plats
->itself
.set_contains_variable ();
1140 ret
|= plats
->ctxlat
.set_contains_variable ();
1141 ret
|= set_agg_lats_contain_variable (plats
);
1142 ret
|= plats
->bits_lattice
.set_to_bottom ();
1143 ret
|= plats
->m_value_range
.set_to_bottom ();
1147 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1148 points to by the number of callers to NODE. */
1151 count_callers (cgraph_node
*node
, void *data
)
1153 int *caller_count
= (int *) data
;
1155 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1156 /* Local thunks can be handled transparently, but if the thunk cannot
1157 be optimized out, count it as a real use. */
1158 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1163 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1164 the one caller of some other node. Set the caller's corresponding flag. */
1167 set_single_call_flag (cgraph_node
*node
, void *)
1169 cgraph_edge
*cs
= node
->callers
;
1170 /* Local thunks can be handled transparently, skip them. */
1171 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1172 cs
= cs
->next_caller
;
1173 if (cs
&& IPA_NODE_REF (cs
->caller
))
1175 IPA_NODE_REF (cs
->caller
)->node_calling_single_call
= true;
1181 /* Initialize ipcp_lattices. */
1184 initialize_node_lattices (struct cgraph_node
*node
)
1186 class ipa_node_params
*info
= IPA_NODE_REF (node
);
1187 struct cgraph_edge
*ie
;
1188 bool disable
= false, variable
= false;
1191 gcc_checking_assert (node
->has_gimple_body_p ());
1193 if (!ipa_get_param_count (info
))
1195 else if (node
->local
)
1197 int caller_count
= 0;
1198 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1200 gcc_checking_assert (caller_count
> 0);
1201 if (caller_count
== 1)
1202 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1207 /* When cloning is allowed, we can assume that externally visible
1208 functions are not called. We will compensate this by cloning
1210 if (ipcp_versionable_function_p (node
)
1211 && ipcp_cloning_candidate_p (node
))
1217 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1218 && !node
->alias
&& !node
->thunk
)
1220 fprintf (dump_file
, "Initializing lattices of %s\n",
1221 node
->dump_name ());
1222 if (disable
|| variable
)
1223 fprintf (dump_file
, " Marking all lattices as %s\n",
1224 disable
? "BOTTOM" : "VARIABLE");
1227 auto_vec
<bool, 16> surviving_params
;
1228 bool pre_modified
= false;
1229 if (!disable
&& node
->clone
.param_adjustments
)
1231 /* At the moment all IPA optimizations should use the number of
1232 parameters of the prevailing decl as the m_always_copy_start.
1233 Handling any other value would complicate the code below, so for the
1234 time bing let's only assert it is so. */
1235 gcc_assert ((node
->clone
.param_adjustments
->m_always_copy_start
1236 == ipa_get_param_count (info
))
1237 || node
->clone
.param_adjustments
->m_always_copy_start
< 0);
1239 pre_modified
= true;
1240 node
->clone
.param_adjustments
->get_surviving_params (&surviving_params
);
1242 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1243 && !node
->alias
&& !node
->thunk
)
1246 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1248 if (j
< (int) surviving_params
.length ()
1249 && surviving_params
[j
])
1254 " The following parameters are dead on arrival:");
1257 fprintf (dump_file
, " %u", j
);
1260 fprintf (dump_file
, "\n");
1264 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1266 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1268 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1269 || !surviving_params
[i
])))
1271 plats
->itself
.set_to_bottom ();
1272 plats
->ctxlat
.set_to_bottom ();
1273 set_agg_lats_to_bottom (plats
);
1274 plats
->bits_lattice
.set_to_bottom ();
1275 plats
->m_value_range
.m_vr
= value_range ();
1276 plats
->m_value_range
.set_to_bottom ();
1280 plats
->m_value_range
.init ();
1282 set_all_contains_variable (plats
);
1286 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1287 if (ie
->indirect_info
->polymorphic
1288 && ie
->indirect_info
->param_index
>= 0)
1290 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1291 ipa_get_parm_lattices (info
,
1292 ie
->indirect_info
->param_index
)->virt_call
= 1;
1296 /* Return the result of a (possibly arithmetic) operation on the constant
1297 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1298 the type of the parameter to which the result is passed. Return
1299 NULL_TREE if that cannot be determined or be considered an
1300 interprocedural invariant. */
1303 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1308 if (opcode
== NOP_EXPR
)
1310 if (!is_gimple_ip_invariant (input
))
1315 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1316 res_type
= boolean_type_node
;
1317 else if (expr_type_first_operand_type_p (opcode
))
1318 res_type
= TREE_TYPE (input
);
1323 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1324 res
= fold_unary (opcode
, res_type
, input
);
1326 res
= fold_binary (opcode
, res_type
, input
, operand
);
1328 if (res
&& !is_gimple_ip_invariant (res
))
1334 /* Return the result of a (possibly arithmetic) pass through jump function
1335 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1336 to which the result is passed. Return NULL_TREE if that cannot be
1337 determined or be considered an interprocedural invariant. */
1340 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1343 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1345 ipa_get_jf_pass_through_operand (jfunc
),
1349 /* Return the result of an ancestor jump function JFUNC on the constant value
1350 INPUT. Return NULL_TREE if that cannot be determined. */
1353 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1355 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1356 if (TREE_CODE (input
) == ADDR_EXPR
)
1358 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1359 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1360 if (known_eq (off
, 0))
1362 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1363 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1364 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1365 build_int_cst (ptr_type_node
, byte_offset
)));
1371 /* Determine whether JFUNC evaluates to a single known constant value and if
1372 so, return it. Otherwise return NULL. INFO describes the caller node or
1373 the one it is inlined to, so that pass-through jump functions can be
1374 evaluated. PARM_TYPE is the type of the parameter to which the result is
1378 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1381 if (jfunc
->type
== IPA_JF_CONST
)
1382 return ipa_get_jf_constant (jfunc
);
1383 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1384 || jfunc
->type
== IPA_JF_ANCESTOR
)
1389 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1390 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1392 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1394 if (info
->ipcp_orig_node
)
1395 input
= info
->known_csts
[idx
];
1398 ipcp_lattice
<tree
> *lat
;
1401 || idx
>= ipa_get_param_count (info
))
1403 lat
= ipa_get_scalar_lat (info
, idx
);
1404 if (!lat
->is_single_const ())
1406 input
= lat
->values
->value
;
1412 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1413 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1415 return ipa_get_jf_ancestor_result (jfunc
, input
);
1421 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1422 that INFO describes the caller node or the one it is inlined to, CS is the
1423 call graph edge corresponding to JFUNC and CSIDX index of the described
1426 ipa_polymorphic_call_context
1427 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1428 ipa_jump_func
*jfunc
)
1430 ipa_edge_args
*args
= IPA_EDGE_REF (cs
);
1431 ipa_polymorphic_call_context ctx
;
1432 ipa_polymorphic_call_context
*edge_ctx
1433 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1435 if (edge_ctx
&& !edge_ctx
->useless_p ())
1438 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1439 || jfunc
->type
== IPA_JF_ANCESTOR
)
1441 ipa_polymorphic_call_context srcctx
;
1443 bool type_preserved
= true;
1444 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1446 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1448 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1449 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1453 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1454 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1456 if (info
->ipcp_orig_node
)
1458 if (info
->known_contexts
.exists ())
1459 srcctx
= info
->known_contexts
[srcidx
];
1464 || srcidx
>= ipa_get_param_count (info
))
1466 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1467 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1468 if (!lat
->is_single_const ())
1470 srcctx
= lat
->values
->value
;
1472 if (srcctx
.useless_p ())
1474 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1475 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1476 if (!type_preserved
)
1477 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1478 srcctx
.combine_with (ctx
);
1485 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1486 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1487 the result is a range or an anti-range. */
1490 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1491 value_range
*src_vr
,
1492 enum tree_code operation
,
1493 tree dst_type
, tree src_type
)
1495 range_fold_unary_expr (dst_vr
, operation
, dst_type
, src_vr
, src_type
);
1496 if (dst_vr
->varying_p () || dst_vr
->undefined_p ())
1501 /* Determine value_range of JFUNC given that INFO describes the caller node or
1502 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1503 and PARM_TYPE of the parameter. */
1506 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1507 ipa_jump_func
*jfunc
, tree parm_type
)
1512 ipa_vr_operation_and_type_effects (&vr
,
1514 NOP_EXPR
, parm_type
,
1515 jfunc
->m_vr
->type ());
1516 if (vr
.singleton_p ())
1518 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1521 ipcp_transformation
*sum
1522 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1523 ? cs
->caller
->inlined_to
1525 if (!sum
|| !sum
->m_vr
)
1528 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1530 if (!(*sum
->m_vr
)[idx
].known
)
1532 tree vr_type
= ipa_get_type (info
, idx
);
1533 value_range
srcvr (wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].min
),
1534 wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].max
),
1535 (*sum
->m_vr
)[idx
].type
);
1537 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1539 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1543 if (ipa_vr_operation_and_type_effects (&res
,
1545 operation
, parm_type
,
1551 value_range op_res
, res
;
1552 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1553 value_range
op_vr (op
, op
);
1555 range_fold_binary_expr (&op_res
, operation
, vr_type
, &srcvr
, &op_vr
);
1556 if (ipa_vr_operation_and_type_effects (&res
,
1558 NOP_EXPR
, parm_type
,
1566 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1567 parameter with the given INDEX. */
1570 get_clone_agg_value (struct cgraph_node
*node
, HOST_WIDE_INT offset
,
1573 struct ipa_agg_replacement_value
*aggval
;
1575 aggval
= ipa_get_agg_replacements_for_node (node
);
1578 if (aggval
->offset
== offset
1579 && aggval
->index
== index
)
1580 return aggval
->value
;
1581 aggval
= aggval
->next
;
1586 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1587 single known constant value and if so, return it. Otherwise return NULL.
1588 NODE and INFO describes the caller node or the one it is inlined to, and
1589 its related info. */
1592 ipa_agg_value_from_node (class ipa_node_params
*info
,
1593 struct cgraph_node
*node
,
1594 struct ipa_agg_jf_item
*item
)
1596 tree value
= NULL_TREE
;
1599 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
1602 if (item
->jftype
== IPA_JF_CONST
)
1603 return item
->value
.constant
;
1605 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1606 || item
->jftype
== IPA_JF_LOAD_AGG
);
1608 src_idx
= item
->value
.pass_through
.formal_id
;
1610 if (info
->ipcp_orig_node
)
1612 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1613 value
= info
->known_csts
[src_idx
];
1615 value
= get_clone_agg_value (node
, item
->value
.load_agg
.offset
,
1618 else if (info
->lattices
)
1620 class ipcp_param_lattices
*src_plats
1621 = ipa_get_parm_lattices (info
, src_idx
);
1623 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1625 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1627 if (!lat
->is_single_const ())
1630 value
= lat
->values
->value
;
1632 else if (src_plats
->aggs
1633 && !src_plats
->aggs_bottom
1634 && !src_plats
->aggs_contain_variable
1635 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1637 struct ipcp_agg_lattice
*aglat
;
1639 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1641 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1644 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1646 if (aglat
->is_single_const ())
1647 value
= aglat
->values
->value
;
1657 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1659 tree load_type
= item
->value
.load_agg
.type
;
1660 tree value_type
= TREE_TYPE (value
);
1662 /* Ensure value type is compatible with load type. */
1663 if (!useless_type_conversion_p (load_type
, value_type
))
1667 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1669 item
->value
.pass_through
.operand
,
1673 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1674 an aggregate and if so, return it. Otherwise return an empty set. NODE
1675 and INFO describes the caller node or the one it is inlined to, and its
1678 struct ipa_agg_value_set
1679 ipa_agg_value_set_from_jfunc (class ipa_node_params
*info
, cgraph_node
*node
,
1680 struct ipa_agg_jump_function
*agg_jfunc
)
1682 struct ipa_agg_value_set agg
;
1683 struct ipa_agg_jf_item
*item
;
1687 agg
.by_ref
= agg_jfunc
->by_ref
;
1689 FOR_EACH_VEC_SAFE_ELT (agg_jfunc
->items
, i
, item
)
1691 tree value
= ipa_agg_value_from_node (info
, node
, item
);
1695 struct ipa_agg_value value_item
;
1697 value_item
.offset
= item
->offset
;
1698 value_item
.value
= value
;
1700 agg
.items
.safe_push (value_item
);
1706 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1707 bottom, not containing a variable component and without any known value at
1711 ipcp_verify_propagated_values (void)
1713 struct cgraph_node
*node
;
1715 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1717 class ipa_node_params
*info
= IPA_NODE_REF (node
);
1718 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1719 || !opt_for_fn (node
->decl
, optimize
))
1721 int i
, count
= ipa_get_param_count (info
);
1723 for (i
= 0; i
< count
; i
++)
1725 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1728 && !lat
->contains_variable
1729 && lat
->values_count
== 0)
1733 symtab
->dump (dump_file
);
1734 fprintf (dump_file
, "\nIPA lattices after constant "
1735 "propagation, before gcc_unreachable:\n");
1736 print_all_lattices (dump_file
, true, false);
1745 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1748 values_equal_for_ipcp_p (tree x
, tree y
)
1750 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
1755 if (TREE_CODE (x
) == ADDR_EXPR
1756 && TREE_CODE (y
) == ADDR_EXPR
1757 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
1758 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
1759 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
1760 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
1762 return operand_equal_p (x
, y
, 0);
1765 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1768 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1769 ipa_polymorphic_call_context y
)
1771 return x
.equal_to (y
);
1775 /* Add a new value source to the value represented by THIS, marking that a
1776 value comes from edge CS and (if the underlying jump function is a
1777 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1778 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1779 scalar value of the parameter itself or the offset within an aggregate. */
1781 template <typename valtype
>
1783 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1784 int src_idx
, HOST_WIDE_INT offset
)
1786 ipcp_value_source
<valtype
> *src
;
1788 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1789 src
->offset
= offset
;
1792 src
->index
= src_idx
;
1794 src
->next
= sources
;
1798 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1799 SOURCE and clear all other fields. */
1801 static ipcp_value
<tree
> *
1802 allocate_and_init_ipcp_value (tree source
)
1804 ipcp_value
<tree
> *val
;
1806 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1807 val
->value
= source
;
1811 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1812 value to SOURCE and clear all other fields. */
1814 static ipcp_value
<ipa_polymorphic_call_context
> *
1815 allocate_and_init_ipcp_value (ipa_polymorphic_call_context source
)
1817 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1820 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1821 ipcp_value
<ipa_polymorphic_call_context
>();
1822 val
->value
= source
;
1826 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1827 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1828 meaning. OFFSET -1 means the source is scalar and not a part of an
1829 aggregate. If non-NULL, VAL_P records address of existing or newly added
1830 ipcp_value. UNLIMITED means whether value count should not exceed the limit
1831 given by PARAM_IPA_CP_VALUE_LIST_SIZE. */
1833 template <typename valtype
>
1835 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1836 ipcp_value
<valtype
> *src_val
,
1837 int src_idx
, HOST_WIDE_INT offset
,
1838 ipcp_value
<valtype
> **val_p
,
1841 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
1849 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
1850 if (values_equal_for_ipcp_p (val
->value
, newval
))
1855 if (ipa_edge_within_scc (cs
))
1857 ipcp_value_source
<valtype
> *s
;
1858 for (s
= val
->sources
; s
; s
= s
->next
)
1859 if (s
->cs
== cs
&& s
->val
== src_val
)
1865 val
->add_source (cs
, src_val
, src_idx
, offset
);
1869 if (!unlimited
&& values_count
== opt_for_fn (cs
->caller
->decl
,
1870 param_ipa_cp_value_list_size
))
1872 /* We can only free sources, not the values themselves, because sources
1873 of other values in this SCC might point to them. */
1874 for (val
= values
; val
; val
= val
->next
)
1876 while (val
->sources
)
1878 ipcp_value_source
<valtype
> *src
= val
->sources
;
1879 val
->sources
= src
->next
;
1880 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
1884 return set_to_bottom ();
1888 val
= allocate_and_init_ipcp_value (newval
);
1889 val
->add_source (cs
, src_val
, src_idx
, offset
);
1892 /* Add the new value to end of value list, which can reduce iterations
1893 of propagation stage for recursive function. */
1895 last_val
->next
= val
;
1905 /* Return true, if a ipcp_value VAL is orginated from parameter value of
1906 self-feeding recursive function via some kind of pass-through jump
1910 self_recursively_generated_p (ipcp_value
<tree
> *val
)
1912 class ipa_node_params
*info
= NULL
;
1914 for (ipcp_value_source
<tree
> *src
= val
->sources
; src
; src
= src
->next
)
1916 cgraph_edge
*cs
= src
->cs
;
1918 if (!src
->val
|| cs
->caller
!= cs
->callee
->function_symbol ())
1921 if (src
->val
== val
)
1925 info
= IPA_NODE_REF (cs
->caller
);
1927 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
,
1929 ipcp_lattice
<tree
> *src_lat
;
1930 ipcp_value
<tree
> *src_val
;
1932 if (src
->offset
== -1)
1933 src_lat
= &plats
->itself
;
1936 struct ipcp_agg_lattice
*src_aglat
;
1938 for (src_aglat
= plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
1939 if (src_aglat
->offset
== src
->offset
)
1945 src_lat
= src_aglat
;
1948 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
1959 /* A helper function that returns result of operation specified by OPCODE on
1960 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
1961 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
1962 acting as its second operand. If non-NULL, RES_TYPE is expected type of
1966 get_val_across_arith_op (enum tree_code opcode
,
1969 ipcp_value
<tree
> *src_val
,
1972 tree opnd1
= src_val
->value
;
1974 /* Skip source values that is incompatible with specified type. */
1976 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
1979 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
1982 /* Propagate values through an arithmetic transformation described by a jump
1983 function associated with edge CS, taking values from SRC_LAT and putting
1984 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
1985 OPND2 is a constant value if transformation is a binary operation.
1986 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
1987 a part of the aggregate. SRC_IDX is the index of the source parameter.
1988 RES_TYPE is the value type of result being propagated into. Return true if
1989 DEST_LAT changed. */
1992 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
1993 enum tree_code opcode
,
1996 ipcp_lattice
<tree
> *src_lat
,
1997 ipcp_lattice
<tree
> *dest_lat
,
1998 HOST_WIDE_INT src_offset
,
2002 ipcp_value
<tree
> *src_val
;
2005 /* Due to circular dependencies, propagating within an SCC through arithmetic
2006 transformation would create infinite number of values. But for
2007 self-feeding recursive function, we could allow propagation in a limited
2008 count, and this can enable a simple kind of recursive function versioning.
2009 For other scenario, we would just make lattices bottom. */
2010 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2014 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2015 param_ipa_cp_max_recursive_depth
);
2016 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2017 return dest_lat
->set_contains_variable ();
2019 /* No benefit if recursive execution is in low probability. */
2020 if (cs
->sreal_frequency () * 100
2021 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2022 param_ipa_cp_min_recursive_probability
))
2023 return dest_lat
->set_contains_variable ();
2025 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2027 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2029 /* Now we do not use self-recursively generated value as propagation
2030 source, this is absolutely conservative, but could avoid explosion
2031 of lattice's value space, especially when one recursive function
2032 calls another recursive. */
2033 if (self_recursively_generated_p (src_val
))
2035 ipcp_value_source
<tree
> *s
;
2037 /* If the lattice has already been propagated for the call site,
2038 no need to do that again. */
2039 for (s
= src_val
->sources
; s
; s
= s
->next
)
2041 return dest_lat
->set_contains_variable ();
2044 val_seeds
.safe_push (src_val
);
2047 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2049 /* Recursively generate lattice values with a limited count. */
2050 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2052 for (int j
= 1; j
< max_recursive_depth
; j
++)
2054 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2059 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2060 src_offset
, &src_val
, true);
2061 gcc_checking_assert (src_val
);
2064 ret
|= dest_lat
->set_contains_variable ();
2067 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2069 /* Now we do not use self-recursively generated value as propagation
2070 source, otherwise it is easy to make value space of normal lattice
2072 if (self_recursively_generated_p (src_val
))
2074 ret
|= dest_lat
->set_contains_variable ();
2078 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2081 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2084 ret
|= dest_lat
->set_contains_variable ();
2090 /* Propagate values through a pass-through jump function JFUNC associated with
2091 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2092 is the index of the source parameter. PARM_TYPE is the type of the
2093 parameter to which the result is passed. */
2096 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2097 ipcp_lattice
<tree
> *src_lat
,
2098 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2101 return propagate_vals_across_arith_jfunc (cs
,
2102 ipa_get_jf_pass_through_operation (jfunc
),
2104 ipa_get_jf_pass_through_operand (jfunc
),
2105 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2108 /* Propagate values through an ancestor jump function JFUNC associated with
2109 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2110 is the index of the source parameter. */
2113 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2114 struct ipa_jump_func
*jfunc
,
2115 ipcp_lattice
<tree
> *src_lat
,
2116 ipcp_lattice
<tree
> *dest_lat
, int src_idx
)
2118 ipcp_value
<tree
> *src_val
;
2121 if (ipa_edge_within_scc (cs
))
2122 return dest_lat
->set_contains_variable ();
2124 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2126 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2129 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2131 ret
|= dest_lat
->set_contains_variable ();
2137 /* Propagate scalar values across jump function JFUNC that is associated with
2138 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2139 parameter to which the result is passed. */
2142 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2143 struct ipa_jump_func
*jfunc
,
2144 ipcp_lattice
<tree
> *dest_lat
,
2147 if (dest_lat
->bottom
)
2150 if (jfunc
->type
== IPA_JF_CONST
)
2152 tree val
= ipa_get_jf_constant (jfunc
);
2153 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2155 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2156 || jfunc
->type
== IPA_JF_ANCESTOR
)
2158 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2159 ipcp_lattice
<tree
> *src_lat
;
2163 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2164 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2166 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2168 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2169 if (src_lat
->bottom
)
2170 return dest_lat
->set_contains_variable ();
2172 /* If we would need to clone the caller and cannot, do not propagate. */
2173 if (!ipcp_versionable_function_p (cs
->caller
)
2174 && (src_lat
->contains_variable
2175 || (src_lat
->values_count
> 1)))
2176 return dest_lat
->set_contains_variable ();
2178 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2179 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2180 dest_lat
, src_idx
, param_type
);
2182 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2185 if (src_lat
->contains_variable
)
2186 ret
|= dest_lat
->set_contains_variable ();
2191 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2192 use it for indirect inlining), we should propagate them too. */
2193 return dest_lat
->set_contains_variable ();
2196 /* Propagate scalar values across jump function JFUNC that is associated with
2197 edge CS and describes argument IDX and put the values into DEST_LAT. */
2200 propagate_context_across_jump_function (cgraph_edge
*cs
,
2201 ipa_jump_func
*jfunc
, int idx
,
2202 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2204 ipa_edge_args
*args
= IPA_EDGE_REF (cs
);
2205 if (dest_lat
->bottom
)
2208 bool added_sth
= false;
2209 bool type_preserved
= true;
2211 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2212 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2215 edge_ctx
= *edge_ctx_ptr
;
2217 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2218 || jfunc
->type
== IPA_JF_ANCESTOR
)
2220 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2222 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2224 /* TODO: Once we figure out how to propagate speculations, it will
2225 probably be a good idea to switch to speculation if type_preserved is
2226 not set instead of punting. */
2227 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2229 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2231 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2232 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2236 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2237 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2240 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2241 /* If we would need to clone the caller and cannot, do not propagate. */
2242 if (!ipcp_versionable_function_p (cs
->caller
)
2243 && (src_lat
->contains_variable
2244 || (src_lat
->values_count
> 1)))
2247 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2248 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2250 ipa_polymorphic_call_context cur
= src_val
->value
;
2252 if (!type_preserved
)
2253 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2254 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2255 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2256 /* TODO: In cases we know how the context is going to be used,
2257 we can improve the result by passing proper OTR_TYPE. */
2258 cur
.combine_with (edge_ctx
);
2259 if (!cur
.useless_p ())
2261 if (src_lat
->contains_variable
2262 && !edge_ctx
.equal_to (cur
))
2263 ret
|= dest_lat
->set_contains_variable ();
2264 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2273 if (!edge_ctx
.useless_p ())
2274 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2276 ret
|= dest_lat
->set_contains_variable ();
2282 /* Propagate bits across jfunc that is associated with
2283 edge cs and update dest_lattice accordingly. */
2286 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2287 ipa_jump_func
*jfunc
,
2288 ipcp_bits_lattice
*dest_lattice
)
2290 if (dest_lattice
->bottom_p ())
2293 enum availability availability
;
2294 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2295 class ipa_node_params
*callee_info
= IPA_NODE_REF (callee
);
2296 tree parm_type
= ipa_get_type (callee_info
, idx
);
2298 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2299 transform for these cases. Similarly, we can have bad type mismatches
2300 with LTO, avoid doing anything with those too. */
2302 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2304 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2305 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2306 "param %i of %s is NULL or unsuitable for bits propagation\n",
2307 idx
, cs
->callee
->dump_name ());
2309 return dest_lattice
->set_to_bottom ();
2312 unsigned precision
= TYPE_PRECISION (parm_type
);
2313 signop sgn
= TYPE_SIGN (parm_type
);
2315 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2316 || jfunc
->type
== IPA_JF_ANCESTOR
)
2318 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2319 tree operand
= NULL_TREE
;
2320 enum tree_code code
;
2323 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2325 code
= ipa_get_jf_pass_through_operation (jfunc
);
2326 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2327 if (code
!= NOP_EXPR
)
2328 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2332 code
= POINTER_PLUS_EXPR
;
2333 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2334 unsigned HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2335 operand
= build_int_cstu (size_type_node
, offset
);
2338 class ipcp_param_lattices
*src_lats
2339 = ipa_get_parm_lattices (caller_info
, src_idx
);
2341 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2347 Assume lattice for x is bottom, however we can still propagate
2348 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2349 and we store it in jump function during analysis stage. */
2351 if (src_lats
->bits_lattice
.bottom_p ()
2353 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2356 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
, sgn
,
2360 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
2361 return dest_lattice
->set_to_bottom ();
2362 else if (jfunc
->bits
)
2363 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2366 return dest_lattice
->set_to_bottom ();
2369 /* Propagate value range across jump function JFUNC that is associated with
2370 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2374 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2375 class ipcp_param_lattices
*dest_plats
,
2378 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2380 if (dest_lat
->bottom_p ())
2384 || (!INTEGRAL_TYPE_P (param_type
)
2385 && !POINTER_TYPE_P (param_type
)))
2386 return dest_lat
->set_to_bottom ();
2388 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2390 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2391 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2392 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2393 class ipcp_param_lattices
*src_lats
2394 = ipa_get_parm_lattices (caller_info
, src_idx
);
2395 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2397 if (src_lats
->m_value_range
.bottom_p ())
2398 return dest_lat
->set_to_bottom ();
2401 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2402 ipa_vr_operation_and_type_effects (&vr
,
2403 &src_lats
->m_value_range
.m_vr
,
2404 operation
, param_type
,
2406 /* A crude way to prevent unbounded number of value range updates
2407 in SCC components. We should allow limited number of updates within
2409 else if (!ipa_edge_within_scc (cs
))
2411 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2412 value_range
op_vr (op
, op
);
2413 value_range op_res
,res
;
2415 range_fold_binary_expr (&op_res
, operation
, operand_type
,
2416 &src_lats
->m_value_range
.m_vr
, &op_vr
);
2417 ipa_vr_operation_and_type_effects (&vr
,
2419 NOP_EXPR
, param_type
,
2422 if (!vr
.undefined_p () && !vr
.varying_p ())
2427 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2430 jfunc
->m_vr
->type ()))
2433 return dest_lat
->meet_with (&vr
);
2436 else if (jfunc
->type
== IPA_JF_CONST
)
2438 tree val
= ipa_get_jf_constant (jfunc
);
2439 if (TREE_CODE (val
) == INTEGER_CST
)
2441 val
= fold_convert (param_type
, val
);
2442 if (TREE_OVERFLOW_P (val
))
2443 val
= drop_tree_overflow (val
);
2445 value_range
tmpvr (val
, val
);
2446 return dest_lat
->meet_with (&tmpvr
);
2452 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2454 jfunc
->m_vr
->type ()))
2455 return dest_lat
->meet_with (&vr
);
2457 return dest_lat
->set_to_bottom ();
2460 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2461 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2462 other cases, return false). If there are no aggregate items, set
2463 aggs_by_ref to NEW_AGGS_BY_REF. */
2466 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2467 bool new_aggs_by_ref
)
2469 if (dest_plats
->aggs
)
2471 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2473 set_agg_lats_to_bottom (dest_plats
);
2478 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2482 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2483 already existing lattice for the given OFFSET and SIZE, marking all skipped
2484 lattices as containing variable and checking for overlaps. If there is no
2485 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2486 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2487 unless there are too many already. If there are two many, return false. If
2488 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2489 skipped lattices were newly marked as containing variable, set *CHANGE to
2490 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2493 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2494 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2495 struct ipcp_agg_lattice
***aglat
,
2496 bool pre_existing
, bool *change
, int max_agg_items
)
2498 gcc_checking_assert (offset
>= 0);
2500 while (**aglat
&& (**aglat
)->offset
< offset
)
2502 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2504 set_agg_lats_to_bottom (dest_plats
);
2507 *change
|= (**aglat
)->set_contains_variable ();
2508 *aglat
= &(**aglat
)->next
;
2511 if (**aglat
&& (**aglat
)->offset
== offset
)
2513 if ((**aglat
)->size
!= val_size
)
2515 set_agg_lats_to_bottom (dest_plats
);
2518 gcc_assert (!(**aglat
)->next
2519 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2524 struct ipcp_agg_lattice
*new_al
;
2526 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2528 set_agg_lats_to_bottom (dest_plats
);
2531 if (dest_plats
->aggs_count
== max_agg_items
)
2533 dest_plats
->aggs_count
++;
2534 new_al
= ipcp_agg_lattice_pool
.allocate ();
2535 memset (new_al
, 0, sizeof (*new_al
));
2537 new_al
->offset
= offset
;
2538 new_al
->size
= val_size
;
2539 new_al
->contains_variable
= pre_existing
;
2541 new_al
->next
= **aglat
;
2547 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2548 containing an unknown value. */
2551 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2556 ret
|= aglat
->set_contains_variable ();
2557 aglat
= aglat
->next
;
2562 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2563 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2564 parameter used for lattice value sources. Return true if DEST_PLATS changed
2568 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2569 class ipcp_param_lattices
*dest_plats
,
2570 class ipcp_param_lattices
*src_plats
,
2571 int src_idx
, HOST_WIDE_INT offset_delta
)
2573 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2574 struct ipcp_agg_lattice
**dst_aglat
;
2577 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2579 if (src_plats
->aggs_bottom
)
2580 return set_agg_lats_contain_variable (dest_plats
);
2581 if (src_plats
->aggs_contain_variable
)
2582 ret
|= set_agg_lats_contain_variable (dest_plats
);
2583 dst_aglat
= &dest_plats
->aggs
;
2585 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2586 param_ipa_max_agg_items
);
2587 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2589 src_aglat
= src_aglat
->next
)
2591 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2595 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2596 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2598 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2600 dst_aglat
= &(*dst_aglat
)->next
;
2601 if (src_aglat
->bottom
)
2603 ret
|= new_al
->set_contains_variable ();
2606 if (src_aglat
->contains_variable
)
2607 ret
|= new_al
->set_contains_variable ();
2608 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2611 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2614 else if (dest_plats
->aggs_bottom
)
2617 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2621 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2622 pass-through JFUNC and if so, whether it has conform and conforms to the
2623 rules about propagating values passed by reference. */
2626 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2627 struct ipa_jump_func
*jfunc
)
2629 return src_plats
->aggs
2630 && (!src_plats
->aggs_by_ref
2631 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2634 /* Propagate values through ITEM, jump function for a part of an aggregate,
2635 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2636 associated with the jump function. Return true if AGLAT changed in any
2640 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2641 struct ipa_agg_jf_item
*item
,
2642 struct ipcp_agg_lattice
*aglat
)
2644 class ipa_node_params
*caller_info
;
2645 class ipcp_param_lattices
*src_plats
;
2646 struct ipcp_lattice
<tree
> *src_lat
;
2647 HOST_WIDE_INT src_offset
;
2652 if (item
->jftype
== IPA_JF_CONST
)
2654 tree value
= item
->value
.constant
;
2656 gcc_checking_assert (is_gimple_ip_invariant (value
));
2657 return aglat
->add_value (value
, cs
, NULL
, 0);
2660 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2661 || item
->jftype
== IPA_JF_LOAD_AGG
);
2663 caller_info
= IPA_NODE_REF (cs
->caller
);
2664 src_idx
= item
->value
.pass_through
.formal_id
;
2665 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2667 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2669 load_type
= NULL_TREE
;
2670 src_lat
= &src_plats
->itself
;
2675 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2676 struct ipcp_agg_lattice
*src_aglat
;
2678 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2679 if (src_aglat
->offset
>= load_offset
)
2682 load_type
= item
->value
.load_agg
.type
;
2684 || src_aglat
->offset
> load_offset
2685 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2686 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2687 return aglat
->set_contains_variable ();
2689 src_lat
= src_aglat
;
2690 src_offset
= load_offset
;
2694 || (!ipcp_versionable_function_p (cs
->caller
)
2695 && !src_lat
->is_single_const ()))
2696 return aglat
->set_contains_variable ();
2698 ret
= propagate_vals_across_arith_jfunc (cs
,
2699 item
->value
.pass_through
.operation
,
2701 item
->value
.pass_through
.operand
,
2707 if (src_lat
->contains_variable
)
2708 ret
|= aglat
->set_contains_variable ();
2713 /* Propagate scalar values across jump function JFUNC that is associated with
2714 edge CS and put the values into DEST_LAT. */
2717 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2718 struct ipa_jump_func
*jfunc
,
2719 class ipcp_param_lattices
*dest_plats
)
2723 if (dest_plats
->aggs_bottom
)
2726 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2727 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2729 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2730 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2731 class ipcp_param_lattices
*src_plats
;
2733 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2734 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2736 /* Currently we do not produce clobber aggregate jump
2737 functions, replace with merging when we do. */
2738 gcc_assert (!jfunc
->agg
.items
);
2739 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2744 else if (jfunc
->type
== IPA_JF_ANCESTOR
2745 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2747 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
2748 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2749 class ipcp_param_lattices
*src_plats
;
2751 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2752 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2754 /* Currently we do not produce clobber aggregate jump
2755 functions, replace with merging when we do. */
2756 gcc_assert (!jfunc
->agg
.items
);
2757 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2758 ipa_get_jf_ancestor_offset (jfunc
));
2760 else if (!src_plats
->aggs_by_ref
)
2761 ret
|= set_agg_lats_to_bottom (dest_plats
);
2763 ret
|= set_agg_lats_contain_variable (dest_plats
);
2767 if (jfunc
->agg
.items
)
2769 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2770 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2771 struct ipa_agg_jf_item
*item
;
2774 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2777 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2778 param_ipa_max_agg_items
);
2779 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2781 HOST_WIDE_INT val_size
;
2783 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2785 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2787 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2788 &aglat
, pre_existing
, &ret
, max_agg_items
))
2790 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2791 aglat
= &(*aglat
)->next
;
2793 else if (dest_plats
->aggs_bottom
)
2797 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2800 ret
|= set_agg_lats_contain_variable (dest_plats
);
2805 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2806 non-thunk) destination, the call passes through a thunk. */
2809 call_passes_through_thunk (cgraph_edge
*cs
)
2811 cgraph_node
*alias_or_thunk
= cs
->callee
;
2812 while (alias_or_thunk
->alias
)
2813 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2814 return alias_or_thunk
->thunk
;
2817 /* Propagate constants from the caller to the callee of CS. INFO describes the
2821 propagate_constants_across_call (struct cgraph_edge
*cs
)
2823 class ipa_node_params
*callee_info
;
2824 enum availability availability
;
2825 cgraph_node
*callee
;
2826 class ipa_edge_args
*args
;
2828 int i
, args_count
, parms_count
;
2830 callee
= cs
->callee
->function_symbol (&availability
);
2831 if (!callee
->definition
)
2833 gcc_checking_assert (callee
->has_gimple_body_p ());
2834 callee_info
= IPA_NODE_REF (callee
);
2838 args
= IPA_EDGE_REF (cs
);
2839 parms_count
= ipa_get_param_count (callee_info
);
2840 if (parms_count
== 0)
2843 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2844 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2846 for (i
= 0; i
< parms_count
; i
++)
2847 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2851 args_count
= ipa_get_cs_argument_count (args
);
2853 /* If this call goes through a thunk we must not propagate to the first (0th)
2854 parameter. However, we might need to uncover a thunk from below a series
2855 of aliases first. */
2856 if (call_passes_through_thunk (cs
))
2858 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2865 for (; (i
< args_count
) && (i
< parms_count
); i
++)
2867 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
2868 class ipcp_param_lattices
*dest_plats
;
2869 tree param_type
= ipa_get_type (callee_info
, i
);
2871 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
2872 if (availability
== AVAIL_INTERPOSABLE
)
2873 ret
|= set_all_contains_variable (dest_plats
);
2876 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
2877 &dest_plats
->itself
,
2879 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
2880 &dest_plats
->ctxlat
);
2882 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
2883 &dest_plats
->bits_lattice
);
2884 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
2886 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
2887 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
2888 dest_plats
, param_type
);
2890 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
2893 for (; i
< parms_count
; i
++)
2894 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
2899 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2900 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2901 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2904 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
2905 vec
<tree
> known_csts
,
2906 vec
<ipa_polymorphic_call_context
> known_contexts
,
2907 vec
<ipa_agg_value_set
> known_aggs
,
2908 struct ipa_agg_replacement_value
*agg_reps
,
2911 int param_index
= ie
->indirect_info
->param_index
;
2912 HOST_WIDE_INT anc_offset
;
2916 *speculative
= false;
2918 if (param_index
== -1)
2921 if (!ie
->indirect_info
->polymorphic
)
2925 if (ie
->indirect_info
->agg_contents
)
2928 if (agg_reps
&& ie
->indirect_info
->guaranteed_unmodified
)
2932 if (agg_reps
->index
== param_index
2933 && agg_reps
->offset
== ie
->indirect_info
->offset
2934 && agg_reps
->by_ref
== ie
->indirect_info
->by_ref
)
2936 t
= agg_reps
->value
;
2939 agg_reps
= agg_reps
->next
;
2944 struct ipa_agg_value_set
*agg
;
2945 if (known_aggs
.length () > (unsigned int) param_index
)
2946 agg
= &known_aggs
[param_index
];
2949 bool from_global_constant
;
2950 t
= ipa_find_agg_cst_for_param (agg
,
2951 (unsigned) param_index
2952 < known_csts
.length ()
2953 ? known_csts
[param_index
]
2955 ie
->indirect_info
->offset
,
2956 ie
->indirect_info
->by_ref
,
2957 &from_global_constant
);
2959 && !from_global_constant
2960 && !ie
->indirect_info
->guaranteed_unmodified
)
2964 else if ((unsigned) param_index
< known_csts
.length ())
2965 t
= known_csts
[param_index
];
2968 && TREE_CODE (t
) == ADDR_EXPR
2969 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
2970 return TREE_OPERAND (t
, 0);
2975 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
2978 gcc_assert (!ie
->indirect_info
->agg_contents
);
2979 anc_offset
= ie
->indirect_info
->offset
;
2983 /* Try to work out value of virtual table pointer value in replacements. */
2984 if (!t
&& agg_reps
&& !ie
->indirect_info
->by_ref
)
2988 if (agg_reps
->index
== param_index
2989 && agg_reps
->offset
== ie
->indirect_info
->offset
2990 && agg_reps
->by_ref
)
2992 t
= agg_reps
->value
;
2995 agg_reps
= agg_reps
->next
;
2999 /* Try to work out value of virtual table pointer value in known
3000 aggregate values. */
3001 if (!t
&& known_aggs
.length () > (unsigned int) param_index
3002 && !ie
->indirect_info
->by_ref
)
3004 struct ipa_agg_value_set
*agg
= &known_aggs
[param_index
];
3005 t
= ipa_find_agg_cst_for_param (agg
,
3006 (unsigned) param_index
3007 < known_csts
.length ()
3008 ? known_csts
[param_index
] : NULL
,
3009 ie
->indirect_info
->offset
, true);
3012 /* If we found the virtual table pointer, lookup the target. */
3016 unsigned HOST_WIDE_INT offset
;
3017 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3020 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3021 vtable
, offset
, &can_refer
);
3025 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3026 || !possible_polymorphic_call_target_p
3027 (ie
, cgraph_node::get (target
)))
3029 /* Do not speculate builtin_unreachable, it is stupid! */
3030 if (ie
->indirect_info
->vptr_changed
)
3032 target
= ipa_impossible_devirt_target (ie
, target
);
3034 *speculative
= ie
->indirect_info
->vptr_changed
;
3041 /* Do we know the constant value of pointer? */
3042 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3043 t
= known_csts
[param_index
];
3045 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3047 ipa_polymorphic_call_context context
;
3048 if (known_contexts
.length () > (unsigned int) param_index
)
3050 context
= known_contexts
[param_index
];
3051 context
.offset_by (anc_offset
);
3052 if (ie
->indirect_info
->vptr_changed
)
3053 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3054 ie
->indirect_info
->otr_type
);
3057 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3058 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3059 if (!ctx2
.useless_p ())
3060 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3065 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3067 if (ie
->indirect_info
->vptr_changed
)
3068 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3069 ie
->indirect_info
->otr_type
);
3074 vec
<cgraph_node
*>targets
;
3077 targets
= possible_polymorphic_call_targets
3078 (ie
->indirect_info
->otr_type
,
3079 ie
->indirect_info
->otr_token
,
3081 if (!final
|| targets
.length () > 1)
3083 struct cgraph_node
*node
;
3086 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3087 || ie
->speculative
|| !ie
->maybe_hot_p ())
3089 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3090 ie
->indirect_info
->otr_token
,
3094 *speculative
= true;
3095 target
= node
->decl
;
3102 *speculative
= false;
3103 if (targets
.length () == 1)
3104 target
= targets
[0]->decl
;
3106 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3109 if (target
&& !possible_polymorphic_call_target_p (ie
,
3110 cgraph_node::get (target
)))
3114 target
= ipa_impossible_devirt_target (ie
, target
);
3120 /* If an indirect edge IE can be turned into a direct one based on data in
3121 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3122 whether the discovered target is only speculative guess. */
3125 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3126 ipa_call_arg_values
*avals
,
3129 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3130 avals
->m_known_contexts
,
3131 avals
->m_known_aggs
,
3135 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3138 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3139 ipa_auto_call_arg_values
*avals
,
3142 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3143 avals
->m_known_contexts
,
3144 avals
->m_known_aggs
,
3148 /* Calculate devirtualization time bonus for NODE, assuming we know information
3149 about arguments stored in AVALS. */
3152 devirtualization_time_bonus (struct cgraph_node
*node
,
3153 ipa_auto_call_arg_values
*avals
)
3155 struct cgraph_edge
*ie
;
3158 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3160 struct cgraph_node
*callee
;
3161 class ipa_fn_summary
*isummary
;
3162 enum availability avail
;
3166 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3170 /* Only bare minimum benefit for clearly un-inlineable targets. */
3172 callee
= cgraph_node::get (target
);
3173 if (!callee
|| !callee
->definition
)
3175 callee
= callee
->function_symbol (&avail
);
3176 if (avail
< AVAIL_AVAILABLE
)
3178 isummary
= ipa_fn_summaries
->get (callee
);
3179 if (!isummary
|| !isummary
->inlinable
)
3182 int size
= ipa_size_summaries
->get (callee
)->size
;
3183 /* FIXME: The values below need re-considering and perhaps also
3184 integrating into the cost metrics, at lest in some very basic way. */
3185 int max_inline_insns_auto
3186 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3187 if (size
<= max_inline_insns_auto
/ 4)
3188 res
+= 31 / ((int)speculative
+ 1);
3189 else if (size
<= max_inline_insns_auto
/ 2)
3190 res
+= 15 / ((int)speculative
+ 1);
3191 else if (size
<= max_inline_insns_auto
3192 || DECL_DECLARED_INLINE_P (callee
->decl
))
3193 res
+= 7 / ((int)speculative
+ 1);
3199 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3202 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3205 ipa_hints hints
= estimates
.hints
;
3206 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3207 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3209 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3211 if (hints
& INLINE_HINT_loop_iterations
)
3212 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3214 if (hints
& INLINE_HINT_loop_stride
)
3215 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3220 /* If there is a reason to penalize the function described by INFO in the
3221 cloning goodness evaluation, do so. */
3223 static inline int64_t
3224 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3227 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3228 evaluation
= (evaluation
3229 * (100 - opt_for_fn (node
->decl
,
3230 param_ipa_cp_recursion_penalty
))) / 100;
3232 if (info
->node_calling_single_call
)
3233 evaluation
= (evaluation
3234 * (100 - opt_for_fn (node
->decl
,
3235 param_ipa_cp_single_call_penalty
)))
3241 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3242 and SIZE_COST and with the sum of frequencies of incoming edges to the
3243 potential new clone in FREQUENCIES. */
3246 good_cloning_opportunity_p (struct cgraph_node
*node
, int time_benefit
,
3247 int freq_sum
, profile_count count_sum
, int size_cost
)
3249 if (time_benefit
== 0
3250 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3251 || node
->optimize_for_size_p ())
3254 gcc_assert (size_cost
> 0);
3256 class ipa_node_params
*info
= IPA_NODE_REF (node
);
3257 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3258 if (max_count
> profile_count::zero ())
3260 int factor
= RDIV (count_sum
.probability_in
3261 (max_count
).to_reg_br_prob_base ()
3262 * 1000, REG_BR_PROB_BASE
);
3263 int64_t evaluation
= (((int64_t) time_benefit
* factor
)
3265 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3269 fprintf (dump_file
, " good_cloning_opportunity_p (time: %i, "
3270 "size: %i, count_sum: ", time_benefit
, size_cost
);
3271 count_sum
.dump (dump_file
);
3272 fprintf (dump_file
, "%s%s) -> evaluation: " "%" PRId64
3273 ", threshold: %i\n",
3274 info
->node_within_scc
3275 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3276 info
->node_calling_single_call
? ", single_call" : "",
3277 evaluation
, eval_threshold
);
3280 return evaluation
>= eval_threshold
;
3284 int64_t evaluation
= (((int64_t) time_benefit
* freq_sum
)
3286 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3289 fprintf (dump_file
, " good_cloning_opportunity_p (time: %i, "
3290 "size: %i, freq_sum: %i%s%s) -> evaluation: "
3291 "%" PRId64
", threshold: %i\n",
3292 time_benefit
, size_cost
, freq_sum
,
3293 info
->node_within_scc
3294 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3295 info
->node_calling_single_call
? ", single_call" : "",
3296 evaluation
, eval_threshold
);
3298 return evaluation
>= eval_threshold
;
3302 /* Return all context independent values from aggregate lattices in PLATS in a
3303 vector. Return NULL if there are none. */
3305 static vec
<ipa_agg_value
>
3306 context_independent_aggregate_values (class ipcp_param_lattices
*plats
)
3308 vec
<ipa_agg_value
> res
= vNULL
;
3310 if (plats
->aggs_bottom
3311 || plats
->aggs_contain_variable
3312 || plats
->aggs_count
== 0)
3315 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
;
3317 aglat
= aglat
->next
)
3318 if (aglat
->is_single_const ())
3320 struct ipa_agg_value item
;
3321 item
.offset
= aglat
->offset
;
3322 item
.value
= aglat
->values
->value
;
3323 res
.safe_push (item
);
3328 /* Grow vectors in AVALS and fill them with information about values of
3329 parameters that are known to be independent of the context. Only calculate
3330 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3331 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3332 parameters will be stored in it.
3334 TODO: Also grow context independent value range vectors. */
3337 gather_context_independent_values (class ipa_node_params
*info
,
3338 ipa_auto_call_arg_values
*avals
,
3339 bool calculate_aggs
,
3340 int *removable_params_cost
)
3342 int i
, count
= ipa_get_param_count (info
);
3345 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3346 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3348 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
3350 if (removable_params_cost
)
3351 *removable_params_cost
= 0;
3353 for (i
= 0; i
< count
; i
++)
3355 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3356 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3358 if (lat
->is_single_const ())
3360 ipcp_value
<tree
> *val
= lat
->values
;
3361 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3362 avals
->m_known_vals
[i
] = val
->value
;
3363 if (removable_params_cost
)
3364 *removable_params_cost
3365 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3368 else if (removable_params_cost
3369 && !ipa_is_param_used (info
, i
))
3370 *removable_params_cost
3371 += ipa_get_param_move_cost (info
, i
);
3373 if (!ipa_is_param_used (info
, i
))
3376 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3377 /* Do not account known context as reason for cloning. We can see
3378 if it permits devirtualization. */
3379 if (ctxlat
->is_single_const ())
3380 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3384 vec
<ipa_agg_value
> agg_items
;
3385 struct ipa_agg_value_set
*agg
;
3387 agg_items
= context_independent_aggregate_values (plats
);
3388 agg
= &avals
->m_known_aggs
[i
];
3389 agg
->items
= agg_items
;
3390 agg
->by_ref
= plats
->aggs_by_ref
;
3391 ret
|= !agg_items
.is_empty ();
3398 /* Perform time and size measurement of NODE with the context given in AVALS,
3399 calculate the benefit compared to the node without specialization and store
3400 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3401 context-independent or unused removable parameters and EST_MOVE_COST, the
3402 estimated movement of the considered parameter. */
3405 perform_estimation_of_a_value (cgraph_node
*node
,
3406 ipa_auto_call_arg_values
*avals
,
3407 int removable_params_cost
, int est_move_cost
,
3408 ipcp_value_base
*val
)
3411 ipa_call_estimates estimates
;
3413 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3414 sreal time_delta
= estimates
.nonspecialized_time
- estimates
.time
;
3415 if (time_delta
> 65535)
3418 /* Extern inline functions have no cloning local time benefits because they
3419 will be inlined anyway. The only reason to clone them is if it enables
3420 optimization in any of the functions they call. */
3421 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3424 time_benefit
= time_delta
.to_int ()
3425 + devirtualization_time_bonus (node
, avals
)
3426 + hint_time_bonus (node
, estimates
)
3427 + removable_params_cost
+ est_move_cost
;
3429 int size
= estimates
.size
;
3430 gcc_checking_assert (size
>=0);
3431 /* The inliner-heuristics based estimates may think that in certain
3432 contexts some functions do not have any size at all but we want
3433 all specializations to have at least a tiny cost, not least not to
3438 val
->local_time_benefit
= time_benefit
;
3439 val
->local_size_cost
= size
;
3442 /* Get the overall limit oof growth based on parameters extracted from growth.
3443 it does not really make sense to mix functions with different overall growth
3444 limits but it is possible and if it happens, we do not want to select one
3448 get_max_overall_size (cgraph_node
*node
)
3450 long max_new_size
= orig_overall_size
;
3451 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3452 if (max_new_size
< large_unit
)
3453 max_new_size
= large_unit
;
3454 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3455 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3456 return max_new_size
;
3459 /* Iterate over known values of parameters of NODE and estimate the local
3460 effects in terms of time and size they have. */
3463 estimate_local_effects (struct cgraph_node
*node
)
3465 class ipa_node_params
*info
= IPA_NODE_REF (node
);
3466 int i
, count
= ipa_get_param_count (info
);
3468 int removable_params_cost
;
3470 if (!count
|| !ipcp_versionable_function_p (node
))
3473 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3474 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3476 ipa_auto_call_arg_values avals
;
3477 always_const
= gather_context_independent_values (info
, &avals
, true,
3478 &removable_params_cost
);
3479 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3480 if (always_const
|| devirt_bonus
3481 || (removable_params_cost
&& node
->can_change_signature
))
3483 struct caller_statistics stats
;
3484 ipa_call_estimates estimates
;
3486 init_caller_stats (&stats
);
3487 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3489 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3490 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3491 time
+= devirt_bonus
;
3492 time
+= hint_time_bonus (node
, estimates
);
3493 time
+= removable_params_cost
;
3494 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3497 fprintf (dump_file
, " - context independent values, size: %i, "
3498 "time_benefit: %f\n", size
, (time
).to_double ());
3500 if (size
<= 0 || node
->local
)
3502 info
->do_clone_for_all_contexts
= true;
3505 fprintf (dump_file
, " Decided to specialize for all "
3506 "known contexts, code not going to grow.\n");
3508 else if (good_cloning_opportunity_p (node
,
3509 MIN ((time
).to_int (), 65536),
3510 stats
.freq_sum
, stats
.count_sum
,
3513 if (size
+ overall_size
<= get_max_overall_size (node
))
3515 info
->do_clone_for_all_contexts
= true;
3516 overall_size
+= size
;
3519 fprintf (dump_file
, " Decided to specialize for all "
3520 "known contexts, growth (to %li) deemed "
3521 "beneficial.\n", overall_size
);
3523 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3524 fprintf (dump_file
, " Not cloning for all contexts because "
3525 "maximum unit size would be reached with %li.\n",
3526 size
+ overall_size
);
3528 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3529 fprintf (dump_file
, " Not cloning for all contexts because "
3530 "!good_cloning_opportunity_p.\n");
3534 for (i
= 0; i
< count
; i
++)
3536 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3537 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3538 ipcp_value
<tree
> *val
;
3542 || avals
.m_known_vals
[i
])
3545 for (val
= lat
->values
; val
; val
= val
->next
)
3547 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3548 avals
.m_known_vals
[i
] = val
->value
;
3550 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3551 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3556 fprintf (dump_file
, " - estimates for value ");
3557 print_ipcp_constant_value (dump_file
, val
->value
);
3558 fprintf (dump_file
, " for ");
3559 ipa_dump_param (dump_file
, info
, i
);
3560 fprintf (dump_file
, ": time_benefit: %i, size: %i\n",
3561 val
->local_time_benefit
, val
->local_size_cost
);
3564 avals
.m_known_vals
[i
] = NULL_TREE
;
3567 for (i
= 0; i
< count
; i
++)
3569 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3571 if (!plats
->virt_call
)
3574 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3575 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3579 || !avals
.m_known_contexts
[i
].useless_p ())
3582 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3584 avals
.m_known_contexts
[i
] = val
->value
;
3585 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3588 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3590 fprintf (dump_file
, " - estimates for polymorphic context ");
3591 print_ipcp_constant_value (dump_file
, val
->value
);
3592 fprintf (dump_file
, " for ");
3593 ipa_dump_param (dump_file
, info
, i
);
3594 fprintf (dump_file
, ": time_benefit: %i, size: %i\n",
3595 val
->local_time_benefit
, val
->local_size_cost
);
3598 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3601 for (i
= 0; i
< count
; i
++)
3603 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3605 if (plats
->aggs_bottom
|| !plats
->aggs
)
3608 ipa_agg_value_set
*agg
= &avals
.m_known_aggs
[i
];
3609 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3611 ipcp_value
<tree
> *val
;
3612 if (aglat
->bottom
|| !aglat
->values
3613 /* If the following is true, the one value is in known_aggs. */
3614 || (!plats
->aggs_contain_variable
3615 && aglat
->is_single_const ()))
3618 for (val
= aglat
->values
; val
; val
= val
->next
)
3620 struct ipa_agg_value item
;
3622 item
.offset
= aglat
->offset
;
3623 item
.value
= val
->value
;
3624 agg
->items
.safe_push (item
);
3626 perform_estimation_of_a_value (node
, &avals
,
3627 removable_params_cost
, 0, val
);
3629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3631 fprintf (dump_file
, " - estimates for value ");
3632 print_ipcp_constant_value (dump_file
, val
->value
);
3633 fprintf (dump_file
, " for ");
3634 ipa_dump_param (dump_file
, info
, i
);
3635 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3636 "]: time_benefit: %i, size: %i\n",
3637 plats
->aggs_by_ref
? "ref " : "",
3639 val
->local_time_benefit
, val
->local_size_cost
);
3649 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3650 topological sort of values. */
3652 template <typename valtype
>
3654 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3656 ipcp_value_source
<valtype
> *src
;
3662 cur_val
->dfs
= dfs_counter
;
3663 cur_val
->low_link
= dfs_counter
;
3665 cur_val
->topo_next
= stack
;
3667 cur_val
->on_stack
= true;
3669 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3672 if (src
->val
->dfs
== 0)
3675 if (src
->val
->low_link
< cur_val
->low_link
)
3676 cur_val
->low_link
= src
->val
->low_link
;
3678 else if (src
->val
->on_stack
3679 && src
->val
->dfs
< cur_val
->low_link
)
3680 cur_val
->low_link
= src
->val
->dfs
;
3683 if (cur_val
->dfs
== cur_val
->low_link
)
3685 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3690 stack
= v
->topo_next
;
3691 v
->on_stack
= false;
3693 v
->scc_next
= scc_list
;
3696 while (v
!= cur_val
);
3698 cur_val
->topo_next
= values_topo
;
3699 values_topo
= cur_val
;
3703 /* Add all values in lattices associated with NODE to the topological sort if
3704 they are not there yet. */
3707 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3709 class ipa_node_params
*info
= IPA_NODE_REF (node
);
3710 int i
, count
= ipa_get_param_count (info
);
3712 for (i
= 0; i
< count
; i
++)
3714 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3715 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3716 struct ipcp_agg_lattice
*aglat
;
3720 ipcp_value
<tree
> *val
;
3721 for (val
= lat
->values
; val
; val
= val
->next
)
3722 topo
->constants
.add_val (val
);
3725 if (!plats
->aggs_bottom
)
3726 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3729 ipcp_value
<tree
> *val
;
3730 for (val
= aglat
->values
; val
; val
= val
->next
)
3731 topo
->constants
.add_val (val
);
3734 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3735 if (!ctxlat
->bottom
)
3737 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3738 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3739 topo
->contexts
.add_val (ctxval
);
3744 /* One pass of constants propagation along the call graph edges, from callers
3745 to callees (requires topological ordering in TOPO), iterate over strongly
3746 connected components. */
3749 propagate_constants_topo (class ipa_topo_info
*topo
)
3753 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3756 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3757 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3759 /* First, iteratively propagate within the strongly connected component
3760 until all lattices stabilize. */
3761 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3762 if (v
->has_gimple_body_p ())
3764 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3765 && opt_for_fn (v
->decl
, optimize
))
3766 push_node_to_stack (topo
, v
);
3767 /* When V is not optimized, we can not push it to stack, but
3768 still we need to set all its callees lattices to bottom. */
3771 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3772 propagate_constants_across_call (cs
);
3776 v
= pop_node_from_stack (topo
);
3779 struct cgraph_edge
*cs
;
3780 class ipa_node_params
*info
= NULL
;
3781 bool self_scc
= true;
3783 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3784 if (ipa_edge_within_scc (cs
))
3786 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3793 info
= IPA_NODE_REF (v
);
3794 info
->node_within_scc
= true;
3797 if (propagate_constants_across_call (cs
))
3798 push_node_to_stack (topo
, callee
);
3802 info
->node_is_self_scc
= self_scc
;
3804 v
= pop_node_from_stack (topo
);
3807 /* Afterwards, propagate along edges leading out of the SCC, calculates
3808 the local effects of the discovered constants and all valid values to
3809 their topological sort. */
3810 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3811 if (v
->has_gimple_body_p ()
3812 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3813 && opt_for_fn (v
->decl
, optimize
))
3815 struct cgraph_edge
*cs
;
3817 estimate_local_effects (v
);
3818 add_all_node_vals_to_toposort (v
, topo
);
3819 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3820 if (!ipa_edge_within_scc (cs
))
3821 propagate_constants_across_call (cs
);
3823 cycle_nodes
.release ();
3828 /* Return the sum of A and B if none of them is bigger than INT_MAX/2, return
3829 the bigger one if otherwise. */
3832 safe_add (int a
, int b
)
3834 if (a
> INT_MAX
/2 || b
> INT_MAX
/2)
3835 return a
> b
? a
: b
;
3841 /* Propagate the estimated effects of individual values along the topological
3842 from the dependent values to those they depend on. */
3844 template <typename valtype
>
3846 value_topo_info
<valtype
>::propagate_effects ()
3848 ipcp_value
<valtype
> *base
;
3850 for (base
= values_topo
; base
; base
= base
->topo_next
)
3852 ipcp_value_source
<valtype
> *src
;
3853 ipcp_value
<valtype
> *val
;
3854 int time
= 0, size
= 0;
3856 for (val
= base
; val
; val
= val
->scc_next
)
3858 time
= safe_add (time
,
3859 val
->local_time_benefit
+ val
->prop_time_benefit
);
3860 size
= safe_add (size
, val
->local_size_cost
+ val
->prop_size_cost
);
3863 for (val
= base
; val
; val
= val
->scc_next
)
3864 for (src
= val
->sources
; src
; src
= src
->next
)
3866 && src
->cs
->maybe_hot_p ())
3868 src
->val
->prop_time_benefit
= safe_add (time
,
3869 src
->val
->prop_time_benefit
);
3870 src
->val
->prop_size_cost
= safe_add (size
,
3871 src
->val
->prop_size_cost
);
3877 /* Propagate constants, polymorphic contexts and their effects from the
3878 summaries interprocedurally. */
3881 ipcp_propagate_stage (class ipa_topo_info
*topo
)
3883 struct cgraph_node
*node
;
3886 fprintf (dump_file
, "\n Propagating constants:\n\n");
3888 max_count
= profile_count::uninitialized ();
3890 FOR_EACH_DEFINED_FUNCTION (node
)
3892 if (node
->has_gimple_body_p ()
3893 && opt_for_fn (node
->decl
, flag_ipa_cp
)
3894 && opt_for_fn (node
->decl
, optimize
))
3896 class ipa_node_params
*info
= IPA_NODE_REF (node
);
3897 determine_versionability (node
, info
);
3899 unsigned nlattices
= ipa_get_param_count (info
);
3900 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
3901 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
3902 initialize_node_lattices (node
);
3904 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
3905 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
3906 overall_size
+= s
->self_size
;
3907 max_count
= max_count
.max (node
->count
.ipa ());
3910 orig_overall_size
= overall_size
;
3913 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
3915 propagate_constants_topo (topo
);
3917 ipcp_verify_propagated_values ();
3918 topo
->constants
.propagate_effects ();
3919 topo
->contexts
.propagate_effects ();
3923 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
3924 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
3928 /* Discover newly direct outgoing edges from NODE which is a new clone with
3929 known KNOWN_CSTS and make them direct. */
3932 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
3933 vec
<tree
> known_csts
,
3934 vec
<ipa_polymorphic_call_context
>
3936 struct ipa_agg_replacement_value
*aggvals
)
3938 struct cgraph_edge
*ie
, *next_ie
;
3941 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
3946 next_ie
= ie
->next_callee
;
3947 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
3948 vNULL
, aggvals
, &speculative
);
3951 bool agg_contents
= ie
->indirect_info
->agg_contents
;
3952 bool polymorphic
= ie
->indirect_info
->polymorphic
;
3953 int param_index
= ie
->indirect_info
->param_index
;
3954 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
3958 if (cs
&& !agg_contents
&& !polymorphic
)
3960 class ipa_node_params
*info
= IPA_NODE_REF (node
);
3961 int c
= ipa_get_controlled_uses (info
, param_index
);
3962 if (c
!= IPA_UNDESCRIBED_USE
)
3964 struct ipa_ref
*to_del
;
3967 ipa_set_controlled_uses (info
, param_index
, c
);
3968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3969 fprintf (dump_file
, " controlled uses count of param "
3970 "%i bumped down to %i\n", param_index
, c
);
3972 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0)))
3974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3975 fprintf (dump_file
, " and even removing its "
3976 "cloning-created reference\n");
3977 to_del
->remove_reference ();
3983 /* Turning calls to direct calls will improve overall summary. */
3985 ipa_update_overall_fn_summary (node
);
3988 class edge_clone_summary
;
3989 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
3991 /* Edge clone summary. */
3993 class edge_clone_summary
3996 /* Default constructor. */
3997 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
3999 /* Default destructor. */
4000 ~edge_clone_summary ()
4003 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4005 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4008 cgraph_edge
*prev_clone
;
4009 cgraph_edge
*next_clone
;
4012 class edge_clone_summary_t
:
4013 public call_summary
<edge_clone_summary
*>
4016 edge_clone_summary_t (symbol_table
*symtab
):
4017 call_summary
<edge_clone_summary
*> (symtab
)
4019 m_initialize_when_cloning
= true;
4022 virtual void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4023 edge_clone_summary
*src_data
,
4024 edge_clone_summary
*dst_data
);
4027 /* Edge duplication hook. */
4030 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4031 edge_clone_summary
*src_data
,
4032 edge_clone_summary
*dst_data
)
4034 if (src_data
->next_clone
)
4035 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4036 dst_data
->prev_clone
= src_edge
;
4037 dst_data
->next_clone
= src_data
->next_clone
;
4038 src_data
->next_clone
= dst_edge
;
4041 /* Return true is CS calls DEST or its clone for all contexts. When
4042 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4043 edges from/to an all-context clone. */
4046 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4047 bool allow_recursion_to_clone
)
4049 enum availability availability
;
4050 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4052 if (availability
<= AVAIL_INTERPOSABLE
)
4056 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4059 class ipa_node_params
*info
= IPA_NODE_REF (callee
);
4060 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4063 /* Return true if edge CS does bring about the value described by SRC to
4064 DEST_VAL of node DEST or its clone for all contexts. */
4067 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4068 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4070 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
4072 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4073 || caller_info
->node_dead
)
4079 if (caller_info
->ipcp_orig_node
)
4082 if (src
->offset
== -1)
4083 t
= caller_info
->known_csts
[src
->index
];
4085 t
= get_clone_agg_value (cs
->caller
, src
->offset
, src
->index
);
4086 return (t
!= NULL_TREE
4087 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4091 if (src
->val
== dest_val
)
4094 struct ipcp_agg_lattice
*aglat
;
4095 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4097 if (src
->offset
== -1)
4098 return (plats
->itself
.is_single_const ()
4099 && values_equal_for_ipcp_p (src
->val
->value
,
4100 plats
->itself
.values
->value
));
4103 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4105 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4106 if (aglat
->offset
== src
->offset
)
4107 return (aglat
->is_single_const ()
4108 && values_equal_for_ipcp_p (src
->val
->value
,
4109 aglat
->values
->value
));
4115 /* Return true if edge CS does bring about the value described by SRC to
4116 DST_VAL of node DEST or its clone for all contexts. */
4119 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4120 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4122 ipcp_value
<ipa_polymorphic_call_context
> *)
4124 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
4126 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4127 || caller_info
->node_dead
)
4132 if (caller_info
->ipcp_orig_node
)
4133 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4134 && values_equal_for_ipcp_p (src
->val
->value
,
4135 caller_info
->known_contexts
[src
->index
]);
4137 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4139 return plats
->ctxlat
.is_single_const ()
4140 && values_equal_for_ipcp_p (src
->val
->value
,
4141 plats
->ctxlat
.values
->value
);
4144 /* Get the next clone in the linked list of clones of an edge. */
4146 static inline struct cgraph_edge
*
4147 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4149 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4150 return s
!= NULL
? s
->next_clone
: NULL
;
4153 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4154 of them is viable and hot, return true. In that case, for those that still
4155 hold, add their edge frequency and their number into *FREQUENCY and
4156 *CALLER_COUNT respectively. */
4158 template <typename valtype
>
4160 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4162 profile_count
*count_sum
, int *caller_count
)
4164 ipcp_value_source
<valtype
> *src
;
4165 int freq
= 0, count
= 0;
4166 profile_count cnt
= profile_count::zero ();
4168 bool non_self_recursive
= false;
4170 for (src
= val
->sources
; src
; src
= src
->next
)
4172 struct cgraph_edge
*cs
= src
->cs
;
4175 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4178 freq
+= cs
->frequency ();
4179 if (cs
->count
.ipa ().initialized_p ())
4180 cnt
+= cs
->count
.ipa ();
4181 hot
|= cs
->maybe_hot_p ();
4182 if (cs
->caller
!= dest
)
4183 non_self_recursive
= true;
4185 cs
= get_next_cgraph_edge_clone (cs
);
4189 /* If the only edges bringing a value are self-recursive ones, do not bother
4191 if (!non_self_recursive
)
4196 *caller_count
= count
;
4198 if (!hot
&& IPA_NODE_REF (dest
)->node_within_scc
)
4200 struct cgraph_edge
*cs
;
4202 /* Cold non-SCC source edge could trigger hot recursive execution of
4203 function. Consider the case as hot and rely on following cost model
4204 computation to further select right one. */
4205 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4206 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4213 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4214 to let a non-self-recursive caller be the first element. Thus, we can
4215 simplify intersecting operations on values that arrive from all of these
4216 callers, especially when there exists self-recursive call. Return true if
4217 this kind of adjustment is possible. */
4220 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> callers
,
4223 for (unsigned i
= 0; i
< callers
.length (); i
++)
4225 cgraph_edge
*cs
= callers
[i
];
4227 if (cs
->caller
!= node
)
4231 callers
[i
] = callers
[0];
4240 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4241 is assumed their number is known and equal to CALLER_COUNT. */
4243 template <typename valtype
>
4244 static vec
<cgraph_edge
*>
4245 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4248 ipcp_value_source
<valtype
> *src
;
4249 vec
<cgraph_edge
*> ret
;
4251 ret
.create (caller_count
);
4252 for (src
= val
->sources
; src
; src
= src
->next
)
4254 struct cgraph_edge
*cs
= src
->cs
;
4257 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4258 ret
.quick_push (cs
);
4259 cs
= get_next_cgraph_edge_clone (cs
);
4263 if (caller_count
> 1)
4264 adjust_callers_for_value_intersection (ret
, dest
);
4269 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4270 Return it or NULL if for some reason it cannot be created. */
4272 static struct ipa_replace_map
*
4273 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
)
4275 struct ipa_replace_map
*replace_map
;
4277 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4280 fprintf (dump_file
, " replacing ");
4281 ipa_dump_param (dump_file
, info
, parm_num
);
4283 fprintf (dump_file
, " with const ");
4284 print_generic_expr (dump_file
, value
);
4285 fprintf (dump_file
, "\n");
4287 replace_map
->parm_num
= parm_num
;
4288 replace_map
->new_tree
= value
;
4292 /* Dump new profiling counts */
4295 dump_profile_updates (struct cgraph_node
*orig_node
,
4296 struct cgraph_node
*new_node
)
4298 struct cgraph_edge
*cs
;
4300 fprintf (dump_file
, " setting count of the specialized node to ");
4301 new_node
->count
.dump (dump_file
);
4302 fprintf (dump_file
, "\n");
4303 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4305 fprintf (dump_file
, " edge to %s has count ",
4306 cs
->callee
->dump_name ());
4307 cs
->count
.dump (dump_file
);
4308 fprintf (dump_file
, "\n");
4311 fprintf (dump_file
, " setting count of the original node to ");
4312 orig_node
->count
.dump (dump_file
);
4313 fprintf (dump_file
, "\n");
4314 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4316 fprintf (dump_file
, " edge to %s is left with ",
4317 cs
->callee
->dump_name ());
4318 cs
->count
.dump (dump_file
);
4319 fprintf (dump_file
, "\n");
4323 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4324 their profile information to reflect this. */
4327 update_profiling_info (struct cgraph_node
*orig_node
,
4328 struct cgraph_node
*new_node
)
4330 struct cgraph_edge
*cs
;
4331 struct caller_statistics stats
;
4332 profile_count new_sum
, orig_sum
;
4333 profile_count remainder
, orig_node_count
= orig_node
->count
;
4334 profile_count orig_new_node_count
= new_node
->count
;
4336 if (!(orig_node_count
.ipa () > profile_count::zero ()))
4339 init_caller_stats (&stats
);
4340 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4342 orig_sum
= stats
.count_sum
;
4343 init_caller_stats (&stats
);
4344 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4346 new_sum
= stats
.count_sum
;
4348 if (orig_node_count
< orig_sum
+ new_sum
)
4352 fprintf (dump_file
, " Problem: node %s has too low count ",
4353 orig_node
->dump_name ());
4354 orig_node_count
.dump (dump_file
);
4355 fprintf (dump_file
, "while the sum of incoming count is ");
4356 (orig_sum
+ new_sum
).dump (dump_file
);
4357 fprintf (dump_file
, "\n");
4360 orig_node_count
= (orig_sum
+ new_sum
).apply_scale (12, 10);
4363 fprintf (dump_file
, " proceeding by pretending it was ");
4364 orig_node_count
.dump (dump_file
);
4365 fprintf (dump_file
, "\n");
4369 remainder
= orig_node_count
.combine_with_ipa_count (orig_node_count
.ipa ()
4372 /* With partial train run we do not want to assume that original's
4373 count is zero whenever we redurect all executed edges to clone.
4374 Simply drop profile to local one in this case. */
4375 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4376 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4377 && flag_profile_partial_training
)
4378 remainder
= remainder
.guessed_local ();
4380 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4381 new_node
->count
= new_sum
;
4382 orig_node
->count
= remainder
;
4384 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4385 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4386 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4387 for (cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4388 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4390 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4391 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4392 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4393 for (cs
= orig_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4394 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4397 dump_profile_updates (orig_node
, new_node
);
4400 /* Update the respective profile of specialized NEW_NODE and the original
4401 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4402 have been redirected to the specialized version. */
4405 update_specialized_profile (struct cgraph_node
*new_node
,
4406 struct cgraph_node
*orig_node
,
4407 profile_count redirected_sum
)
4409 struct cgraph_edge
*cs
;
4410 profile_count new_node_count
, orig_node_count
= orig_node
->count
;
4414 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4415 redirected_sum
.dump (dump_file
);
4416 fprintf (dump_file
, "\n");
4418 if (!(orig_node_count
> profile_count::zero ()))
4421 gcc_assert (orig_node_count
>= redirected_sum
);
4423 new_node_count
= new_node
->count
;
4424 new_node
->count
+= redirected_sum
;
4425 orig_node
->count
-= redirected_sum
;
4427 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4428 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4430 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4432 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4438 dump_profile_updates (orig_node
, new_node
);
4441 /* Return true if we would like to remove a parameter from NODE when cloning it
4442 with KNOWN_CSTS scalar constants. */
4445 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
4447 auto_vec
<bool, 16> surviving
;
4448 bool filled_vec
= false;
4449 ipa_node_params
*info
= IPA_NODE_REF (node
);
4450 int i
, count
= ipa_get_param_count (info
);
4452 for (i
= 0; i
< count
; i
++)
4454 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
4459 if (!node
->clone
.param_adjustments
)
4461 node
->clone
.param_adjustments
->get_surviving_params (&surviving
);
4464 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
4470 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
4471 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
4472 redirect all edges in CALLERS to it. */
4474 static struct cgraph_node
*
4475 create_specialized_node (struct cgraph_node
*node
,
4476 vec
<tree
> known_csts
,
4477 vec
<ipa_polymorphic_call_context
> known_contexts
,
4478 struct ipa_agg_replacement_value
*aggvals
,
4479 vec
<cgraph_edge
*> callers
)
4481 class ipa_node_params
*new_info
, *info
= IPA_NODE_REF (node
);
4482 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
4483 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
4484 struct ipa_agg_replacement_value
*av
;
4485 struct cgraph_node
*new_node
;
4486 int i
, count
= ipa_get_param_count (info
);
4487 ipa_param_adjustments
*old_adjustments
= node
->clone
.param_adjustments
;
4488 ipa_param_adjustments
*new_adjustments
;
4489 gcc_assert (!info
->ipcp_orig_node
);
4490 gcc_assert (node
->can_change_signature
4491 || !old_adjustments
);
4493 if (old_adjustments
)
4495 /* At the moment all IPA optimizations should use the number of
4496 parameters of the prevailing decl as the m_always_copy_start.
4497 Handling any other value would complicate the code below, so for the
4498 time bing let's only assert it is so. */
4499 gcc_assert (old_adjustments
->m_always_copy_start
== count
4500 || old_adjustments
->m_always_copy_start
< 0);
4501 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
4502 for (i
= 0; i
< old_adj_count
; i
++)
4504 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
4505 if (!node
->can_change_signature
4506 || old_adj
->op
!= IPA_PARAM_OP_COPY
4507 || (!known_csts
[old_adj
->base_index
]
4508 && ipa_is_param_used (info
, old_adj
->base_index
)))
4510 ipa_adjusted_param new_adj
= *old_adj
;
4512 new_adj
.prev_clone_adjustment
= true;
4513 new_adj
.prev_clone_index
= i
;
4514 vec_safe_push (new_params
, new_adj
);
4517 bool skip_return
= old_adjustments
->m_skip_return
;
4518 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
4519 ipa_param_adjustments (new_params
, count
,
4522 else if (node
->can_change_signature
4523 && want_remove_some_param_p (node
, known_csts
))
4525 ipa_adjusted_param adj
;
4526 memset (&adj
, 0, sizeof (adj
));
4527 adj
.op
= IPA_PARAM_OP_COPY
;
4528 for (i
= 0; i
< count
; i
++)
4529 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
4532 adj
.prev_clone_index
= i
;
4533 vec_safe_push (new_params
, adj
);
4535 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
4536 ipa_param_adjustments (new_params
, count
, false));
4539 new_adjustments
= NULL
;
4541 replace_trees
= vec_safe_copy (node
->clone
.tree_map
);
4542 for (i
= 0; i
< count
; i
++)
4544 tree t
= known_csts
[i
];
4547 struct ipa_replace_map
*replace_map
;
4549 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
4550 replace_map
= get_replacement_map (info
, t
, i
);
4552 vec_safe_push (replace_trees
, replace_map
);
4555 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
4556 for (i
= callers
.length () - 1; i
>= 0; i
--)
4558 cgraph_edge
*cs
= callers
[i
];
4559 if (cs
->caller
== node
)
4561 self_recursive_calls
.safe_push (cs
);
4562 callers
.unordered_remove (i
);
4566 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
4567 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4569 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
4570 new_adjustments
, "constprop",
4574 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
4575 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
4577 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
4578 /* Cloned edges can disappear during cloning as speculation can be
4579 resolved, check that we have one and that it comes from the last
4581 if (cs
&& cs
->caller
== new_node
)
4582 cs
->redirect_callee_duplicating_thunks (new_node
);
4583 /* Any future code that would make more than one clone of an outgoing
4584 edge would confuse this mechanism, so let's check that does not
4586 gcc_checking_assert (!cs
4587 || !get_next_cgraph_edge_clone (cs
)
4588 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
4590 if (have_self_recursive_calls
)
4591 new_node
->expand_all_artificial_thunks ();
4593 ipa_set_node_agg_value_chain (new_node
, aggvals
);
4594 for (av
= aggvals
; av
; av
= av
->next
)
4595 new_node
->maybe_create_reference (av
->value
, NULL
);
4597 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4599 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
4600 if (known_contexts
.exists ())
4602 for (i
= 0; i
< count
; i
++)
4603 if (!known_contexts
[i
].useless_p ())
4605 fprintf (dump_file
, " known ctx %i is ", i
);
4606 known_contexts
[i
].dump (dump_file
);
4610 ipa_dump_agg_replacement_values (dump_file
, aggvals
);
4612 ipa_check_create_node_params ();
4613 update_profiling_info (node
, new_node
);
4614 new_info
= IPA_NODE_REF (new_node
);
4615 new_info
->ipcp_orig_node
= node
;
4616 new_node
->ipcp_clone
= true;
4617 new_info
->known_csts
= known_csts
;
4618 new_info
->known_contexts
= known_contexts
;
4620 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
, aggvals
);
4626 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
4627 pass-through function to itself when the cgraph_node involved is not an
4628 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
4629 no-operation pass-through. */
4632 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
4635 enum availability availability
;
4636 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
4637 && availability
> AVAIL_INTERPOSABLE
4638 && jfunc
->type
== IPA_JF_PASS_THROUGH
4639 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4640 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
4641 && IPA_NODE_REF (cs
->caller
)
4642 && !IPA_NODE_REF (cs
->caller
)->ipcp_orig_node
)
4647 /* Return true if JFUNC, which describes a part of an aggregate represented or
4648 pointed to by the i-th parameter of call CS, is a pass-through function to
4649 itself when the cgraph_node involved is not an IPA-CP clone.. When
4650 SIMPLE is true, further check if JFUNC is a simple no-operation
4654 self_recursive_agg_pass_through_p (cgraph_edge
*cs
, ipa_agg_jf_item
*jfunc
,
4655 int i
, bool simple
= true)
4657 enum availability availability
;
4658 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
4659 && availability
> AVAIL_INTERPOSABLE
4660 && jfunc
->jftype
== IPA_JF_LOAD_AGG
4661 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
4662 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
4663 && jfunc
->value
.pass_through
.formal_id
== i
4664 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
4665 && IPA_NODE_REF (cs
->caller
)
4666 && !IPA_NODE_REF (cs
->caller
)->ipcp_orig_node
)
4671 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
4672 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
4675 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
4676 vec
<tree
> known_csts
,
4677 vec
<cgraph_edge
*> callers
)
4679 class ipa_node_params
*info
= IPA_NODE_REF (node
);
4680 int i
, count
= ipa_get_param_count (info
);
4682 for (i
= 0; i
< count
; i
++)
4684 struct cgraph_edge
*cs
;
4685 tree newval
= NULL_TREE
;
4688 tree type
= ipa_get_type (info
, i
);
4690 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
4693 FOR_EACH_VEC_ELT (callers
, j
, cs
)
4695 struct ipa_jump_func
*jump_func
;
4698 if (!IPA_EDGE_REF (cs
)
4699 || i
>= ipa_get_cs_argument_count (IPA_EDGE_REF (cs
))
4701 && call_passes_through_thunk (cs
)))
4706 jump_func
= ipa_get_ith_jump_func (IPA_EDGE_REF (cs
), i
);
4708 /* Besides simple pass-through jump function, arithmetic jump
4709 function could also introduce argument-direct-pass-through for
4710 self-feeding recursive call. For example,
4717 Given that i is 0, recursive propagation via (i & 1) also gets
4719 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
4721 gcc_assert (newval
);
4722 t
= ipa_get_jf_arith_result (
4723 ipa_get_jf_pass_through_operation (jump_func
),
4725 ipa_get_jf_pass_through_operand (jump_func
),
4729 t
= ipa_value_from_jfunc (IPA_NODE_REF (cs
->caller
), jump_func
,
4733 && !values_equal_for_ipcp_p (t
, newval
))
4734 || (!first
&& !newval
))
4746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4748 fprintf (dump_file
, " adding an extra known scalar value ");
4749 print_ipcp_constant_value (dump_file
, newval
);
4750 fprintf (dump_file
, " for ");
4751 ipa_dump_param (dump_file
, info
, i
);
4752 fprintf (dump_file
, "\n");
4755 known_csts
[i
] = newval
;
4760 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
4761 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
4765 find_more_contexts_for_caller_subset (cgraph_node
*node
,
4766 vec
<ipa_polymorphic_call_context
>
4768 vec
<cgraph_edge
*> callers
)
4770 ipa_node_params
*info
= IPA_NODE_REF (node
);
4771 int i
, count
= ipa_get_param_count (info
);
4773 for (i
= 0; i
< count
; i
++)
4777 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
4778 || (known_contexts
->exists ()
4779 && !(*known_contexts
)[i
].useless_p ()))
4782 ipa_polymorphic_call_context newval
;
4786 FOR_EACH_VEC_ELT (callers
, j
, cs
)
4788 if (!IPA_EDGE_REF (cs
)
4789 || i
>= ipa_get_cs_argument_count (IPA_EDGE_REF (cs
)))
4791 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (IPA_EDGE_REF (cs
),
4793 ipa_polymorphic_call_context ctx
;
4794 ctx
= ipa_context_from_jfunc (IPA_NODE_REF (cs
->caller
), cs
, i
,
4802 newval
.meet_with (ctx
);
4803 if (newval
.useless_p ())
4807 if (!newval
.useless_p ())
4809 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4811 fprintf (dump_file
, " adding an extra known polymorphic "
4813 print_ipcp_constant_value (dump_file
, newval
);
4814 fprintf (dump_file
, " for ");
4815 ipa_dump_param (dump_file
, info
, i
);
4816 fprintf (dump_file
, "\n");
4819 if (!known_contexts
->exists ())
4820 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
4822 (*known_contexts
)[i
] = newval
;
4828 /* Go through PLATS and create a vector of values consisting of values and
4829 offsets (minus OFFSET) of lattices that contain only a single value. */
4831 static vec
<ipa_agg_value
>
4832 copy_plats_to_inter (class ipcp_param_lattices
*plats
, HOST_WIDE_INT offset
)
4834 vec
<ipa_agg_value
> res
= vNULL
;
4836 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
4839 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4840 if (aglat
->is_single_const ())
4842 struct ipa_agg_value ti
;
4843 ti
.offset
= aglat
->offset
- offset
;
4844 ti
.value
= aglat
->values
->value
;
4850 /* Intersect all values in INTER with single value lattices in PLATS (while
4851 subtracting OFFSET). */
4854 intersect_with_plats (class ipcp_param_lattices
*plats
,
4855 vec
<ipa_agg_value
> *inter
,
4856 HOST_WIDE_INT offset
)
4858 struct ipcp_agg_lattice
*aglat
;
4859 struct ipa_agg_value
*item
;
4862 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
4868 aglat
= plats
->aggs
;
4869 FOR_EACH_VEC_ELT (*inter
, k
, item
)
4876 if (aglat
->offset
- offset
> item
->offset
)
4878 if (aglat
->offset
- offset
== item
->offset
)
4880 if (aglat
->is_single_const ())
4882 tree value
= aglat
->values
->value
;
4884 if (values_equal_for_ipcp_p (item
->value
, value
))
4889 aglat
= aglat
->next
;
4892 item
->value
= NULL_TREE
;
4896 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
4897 vector result while subtracting OFFSET from the individual value offsets. */
4899 static vec
<ipa_agg_value
>
4900 agg_replacements_to_vector (struct cgraph_node
*node
, int index
,
4901 HOST_WIDE_INT offset
)
4903 struct ipa_agg_replacement_value
*av
;
4904 vec
<ipa_agg_value
> res
= vNULL
;
4906 for (av
= ipa_get_agg_replacements_for_node (node
); av
; av
= av
->next
)
4907 if (av
->index
== index
4908 && (av
->offset
- offset
) >= 0)
4910 struct ipa_agg_value item
;
4911 gcc_checking_assert (av
->value
);
4912 item
.offset
= av
->offset
- offset
;
4913 item
.value
= av
->value
;
4914 res
.safe_push (item
);
4920 /* Intersect all values in INTER with those that we have already scheduled to
4921 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
4922 (while subtracting OFFSET). */
4925 intersect_with_agg_replacements (struct cgraph_node
*node
, int index
,
4926 vec
<ipa_agg_value
> *inter
,
4927 HOST_WIDE_INT offset
)
4929 struct ipa_agg_replacement_value
*srcvals
;
4930 struct ipa_agg_value
*item
;
4933 srcvals
= ipa_get_agg_replacements_for_node (node
);
4940 FOR_EACH_VEC_ELT (*inter
, i
, item
)
4942 struct ipa_agg_replacement_value
*av
;
4946 for (av
= srcvals
; av
; av
= av
->next
)
4948 gcc_checking_assert (av
->value
);
4949 if (av
->index
== index
4950 && av
->offset
- offset
== item
->offset
)
4952 if (values_equal_for_ipcp_p (item
->value
, av
->value
))
4958 item
->value
= NULL_TREE
;
4962 /* Intersect values in INTER with aggregate values that come along edge CS to
4963 parameter number INDEX and return it. If INTER does not actually exist yet,
4964 copy all incoming values to it. If we determine we ended up with no values
4965 whatsoever, return a released vector. */
4967 static vec
<ipa_agg_value
>
4968 intersect_aggregates_with_edge (struct cgraph_edge
*cs
, int index
,
4969 vec
<ipa_agg_value
> inter
)
4971 struct ipa_jump_func
*jfunc
;
4972 jfunc
= ipa_get_ith_jump_func (IPA_EDGE_REF (cs
), index
);
4973 if (jfunc
->type
== IPA_JF_PASS_THROUGH
4974 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
4976 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
4977 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
4979 if (caller_info
->ipcp_orig_node
)
4981 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
4982 class ipcp_param_lattices
*orig_plats
;
4983 orig_plats
= ipa_get_parm_lattices (IPA_NODE_REF (orig_node
),
4985 if (agg_pass_through_permissible_p (orig_plats
, jfunc
))
4987 if (!inter
.exists ())
4988 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, 0);
4990 intersect_with_agg_replacements (cs
->caller
, src_idx
,
4997 class ipcp_param_lattices
*src_plats
;
4998 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
4999 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
5001 /* Currently we do not produce clobber aggregate jump
5002 functions, adjust when we do. */
5003 gcc_checking_assert (!jfunc
->agg
.items
);
5004 if (!inter
.exists ())
5005 inter
= copy_plats_to_inter (src_plats
, 0);
5007 intersect_with_plats (src_plats
, &inter
, 0);
5012 else if (jfunc
->type
== IPA_JF_ANCESTOR
5013 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5015 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
5016 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5017 class ipcp_param_lattices
*src_plats
;
5018 HOST_WIDE_INT delta
= ipa_get_jf_ancestor_offset (jfunc
);
5020 if (caller_info
->ipcp_orig_node
)
5022 if (!inter
.exists ())
5023 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, delta
);
5025 intersect_with_agg_replacements (cs
->caller
, src_idx
, &inter
,
5030 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5031 /* Currently we do not produce clobber aggregate jump
5032 functions, adjust when we do. */
5033 gcc_checking_assert (!src_plats
->aggs
|| !jfunc
->agg
.items
);
5034 if (!inter
.exists ())
5035 inter
= copy_plats_to_inter (src_plats
, delta
);
5037 intersect_with_plats (src_plats
, &inter
, delta
);
5042 if (jfunc
->agg
.items
)
5044 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
5045 struct ipa_agg_value
*item
;
5048 if (!inter
.exists ())
5049 for (unsigned i
= 0; i
< jfunc
->agg
.items
->length (); i
++)
5051 struct ipa_agg_jf_item
*agg_item
= &(*jfunc
->agg
.items
)[i
];
5052 tree value
= ipa_agg_value_from_node (caller_info
, cs
->caller
,
5056 struct ipa_agg_value agg_value
;
5058 agg_value
.value
= value
;
5059 agg_value
.offset
= agg_item
->offset
;
5060 inter
.safe_push (agg_value
);
5064 FOR_EACH_VEC_ELT (inter
, k
, item
)
5072 while ((unsigned) l
< jfunc
->agg
.items
->length ())
5074 struct ipa_agg_jf_item
*ti
;
5075 ti
= &(*jfunc
->agg
.items
)[l
];
5076 if (ti
->offset
> item
->offset
)
5078 if (ti
->offset
== item
->offset
)
5082 /* Besides simple pass-through aggregate jump function,
5083 arithmetic aggregate jump function could also bring
5084 same aggregate value as parameter passed-in for
5085 self-feeding recursive call. For example,
5093 Given that *i is 0, recursive propagation via (*i & 1)
5095 if (self_recursive_agg_pass_through_p (cs
, ti
, index
,
5097 value
= ipa_get_jf_arith_result (
5098 ti
->value
.pass_through
.operation
,
5100 ti
->value
.pass_through
.operand
,
5103 value
= ipa_agg_value_from_node (caller_info
,
5106 if (value
&& values_equal_for_ipcp_p (item
->value
, value
))
5124 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5125 from all of them. */
5127 static struct ipa_agg_replacement_value
*
5128 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5129 vec
<cgraph_edge
*> callers
)
5131 class ipa_node_params
*dest_info
= IPA_NODE_REF (node
);
5132 struct ipa_agg_replacement_value
*res
;
5133 struct ipa_agg_replacement_value
**tail
= &res
;
5134 struct cgraph_edge
*cs
;
5135 int i
, j
, count
= ipa_get_param_count (dest_info
);
5137 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5139 if (!IPA_EDGE_REF (cs
))
5144 int c
= ipa_get_cs_argument_count (IPA_EDGE_REF (cs
));
5149 for (i
= 0; i
< count
; i
++)
5151 struct cgraph_edge
*cs
;
5152 vec
<ipa_agg_value
> inter
= vNULL
;
5153 struct ipa_agg_value
*item
;
5154 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, i
);
5157 /* Among other things, the following check should deal with all by_ref
5159 if (plats
->aggs_bottom
)
5162 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5164 struct ipa_jump_func
*jfunc
5165 = ipa_get_ith_jump_func (IPA_EDGE_REF (cs
), i
);
5166 if (self_recursive_pass_through_p (cs
, jfunc
, i
)
5167 && (!plats
->aggs_by_ref
5168 || ipa_get_jf_pass_through_agg_preserved (jfunc
)))
5170 inter
= intersect_aggregates_with_edge (cs
, i
, inter
);
5172 if (!inter
.exists ())
5176 FOR_EACH_VEC_ELT (inter
, j
, item
)
5178 struct ipa_agg_replacement_value
*v
;
5183 v
= ggc_alloc
<ipa_agg_replacement_value
> ();
5185 v
->offset
= item
->offset
;
5186 v
->value
= item
->value
;
5187 v
->by_ref
= plats
->aggs_by_ref
;
5193 if (inter
.exists ())
5200 /* Determine whether CS also brings all scalar values that the NODE is
5204 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5205 struct cgraph_node
*node
)
5207 class ipa_node_params
*dest_info
= IPA_NODE_REF (node
);
5208 int count
= ipa_get_param_count (dest_info
);
5209 class ipa_node_params
*caller_info
;
5210 class ipa_edge_args
*args
;
5213 caller_info
= IPA_NODE_REF (cs
->caller
);
5214 args
= IPA_EDGE_REF (cs
);
5215 for (i
= 0; i
< count
; i
++)
5217 struct ipa_jump_func
*jump_func
;
5220 val
= dest_info
->known_csts
[i
];
5224 if (i
>= ipa_get_cs_argument_count (args
))
5226 jump_func
= ipa_get_ith_jump_func (args
, i
);
5227 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5228 ipa_get_type (dest_info
, i
));
5229 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5235 /* Determine whether CS also brings all aggregate values that NODE is
5238 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5239 struct cgraph_node
*node
)
5241 class ipa_node_params
*orig_node_info
;
5242 struct ipa_agg_replacement_value
*aggval
;
5245 aggval
= ipa_get_agg_replacements_for_node (node
);
5249 count
= ipa_get_param_count (IPA_NODE_REF (node
));
5250 ec
= ipa_get_cs_argument_count (IPA_EDGE_REF (cs
));
5252 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5253 if (aggval
->index
>= ec
)
5256 orig_node_info
= IPA_NODE_REF (IPA_NODE_REF (node
)->ipcp_orig_node
);
5258 for (i
= 0; i
< count
; i
++)
5260 class ipcp_param_lattices
*plats
;
5261 bool interesting
= false;
5262 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5263 if (aggval
->index
== i
)
5271 plats
= ipa_get_parm_lattices (orig_node_info
, aggval
->index
);
5272 if (plats
->aggs_bottom
)
5275 vec
<ipa_agg_value
> values
= intersect_aggregates_with_edge (cs
, i
, vNULL
);
5276 if (!values
.exists ())
5279 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5280 if (aggval
->index
== i
)
5282 struct ipa_agg_value
*item
;
5285 FOR_EACH_VEC_ELT (values
, j
, item
)
5287 && item
->offset
== av
->offset
5288 && values_equal_for_ipcp_p (item
->value
, av
->value
))
5304 /* Given an original NODE and a VAL for which we have already created a
5305 specialized clone, look whether there are incoming edges that still lead
5306 into the old node but now also bring the requested value and also conform to
5307 all other criteria such that they can be redirected the special node.
5308 This function can therefore redirect the final edge in a SCC. */
5310 template <typename valtype
>
5312 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5314 ipcp_value_source
<valtype
> *src
;
5315 profile_count redirected_sum
= profile_count::zero ();
5317 for (src
= val
->sources
; src
; src
= src
->next
)
5319 struct cgraph_edge
*cs
= src
->cs
;
5322 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5323 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5324 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5327 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5328 cs
->caller
->dump_name (),
5329 val
->spec_node
->dump_name ());
5331 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5332 val
->spec_node
->expand_all_artificial_thunks ();
5333 if (cs
->count
.ipa ().initialized_p ())
5334 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5336 cs
= get_next_cgraph_edge_clone (cs
);
5340 if (redirected_sum
.nonzero_p ())
5341 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5344 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5347 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5349 ipa_polymorphic_call_context
*ctx
;
5352 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5353 if (!ctx
->useless_p ())
5358 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5360 static vec
<ipa_polymorphic_call_context
>
5361 copy_useful_known_contexts (vec
<ipa_polymorphic_call_context
> known_contexts
)
5363 if (known_contexts_useful_p (known_contexts
))
5364 return known_contexts
.copy ();
5369 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5370 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5374 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5375 vec
<tree
> *known_csts
,
5376 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5377 ipcp_value
<tree
> *val
, int index
)
5379 *known_csts
= avals
->m_known_vals
.copy ();
5380 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5381 (*known_csts
)[index
] = val
->value
;
5384 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5385 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5389 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5390 vec
<tree
> *known_csts
,
5391 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5392 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5395 *known_csts
= avals
->m_known_vals
.copy ();
5396 *known_contexts
= avals
->m_known_contexts
.copy ();
5397 (*known_contexts
)[index
] = val
->value
;
5400 /* Return true if OFFSET indicates this was not an aggregate value or there is
5401 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5405 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*aggvals
,
5406 int index
, HOST_WIDE_INT offset
, tree value
)
5413 if (aggvals
->index
== index
5414 && aggvals
->offset
== offset
5415 && values_equal_for_ipcp_p (aggvals
->value
, value
))
5417 aggvals
= aggvals
->next
;
5422 /* Return true if offset is minus one because source of a polymorphic context
5423 cannot be an aggregate value. */
5426 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*,
5427 int , HOST_WIDE_INT offset
,
5428 ipa_polymorphic_call_context
)
5430 return offset
== -1;
5433 /* Decide whether to create a special version of NODE for value VAL of
5434 parameter at the given INDEX. If OFFSET is -1, the value is for the
5435 parameter itself, otherwise it is stored at the given OFFSET of the
5436 parameter. AVALS describes the other already known values. */
5438 template <typename valtype
>
5440 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
5441 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
)
5443 struct ipa_agg_replacement_value
*aggvals
;
5444 int freq_sum
, caller_count
;
5445 profile_count count_sum
;
5446 vec
<cgraph_edge
*> callers
;
5450 perhaps_add_new_callers (node
, val
);
5453 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
5455 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5456 fprintf (dump_file
, " Ignoring candidate value because "
5457 "maximum unit size would be reached with %li.\n",
5458 val
->local_size_cost
+ overall_size
);
5461 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &count_sum
,
5465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5467 fprintf (dump_file
, " - considering value ");
5468 print_ipcp_constant_value (dump_file
, val
->value
);
5469 fprintf (dump_file
, " for ");
5470 ipa_dump_param (dump_file
, IPA_NODE_REF (node
), index
);
5472 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
5473 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
5476 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
5477 freq_sum
, count_sum
,
5478 val
->local_size_cost
)
5479 && !good_cloning_opportunity_p (node
,
5480 safe_add (val
->local_time_benefit
,
5481 val
->prop_time_benefit
),
5482 freq_sum
, count_sum
,
5483 safe_add (val
->local_size_cost
,
5484 val
->prop_size_cost
)))
5488 fprintf (dump_file
, " Creating a specialized node of %s.\n",
5489 node
->dump_name ());
5491 vec
<tree
> known_csts
;
5492 vec
<ipa_polymorphic_call_context
> known_contexts
;
5494 callers
= gather_edges_for_value (val
, node
, caller_count
);
5496 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
5499 known_csts
= avals
->m_known_vals
.copy ();
5500 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5502 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
5503 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
5504 aggvals
= find_aggregate_values_for_callers_subset (node
, callers
);
5505 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
5506 offset
, val
->value
));
5507 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
5509 overall_size
+= val
->local_size_cost
;
5510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5511 fprintf (dump_file
, " overall size reached %li\n",
5514 /* TODO: If for some lattice there is only one other known value
5515 left, make a special node for it too. */
5520 /* Decide whether and what specialized clones of NODE should be created. */
5523 decide_whether_version_node (struct cgraph_node
*node
)
5525 class ipa_node_params
*info
= IPA_NODE_REF (node
);
5526 int i
, count
= ipa_get_param_count (info
);
5532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5533 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
5534 node
->dump_name ());
5536 ipa_auto_call_arg_values avals
;
5537 gather_context_independent_values (info
, &avals
, false, NULL
);
5539 for (i
= 0; i
< count
;i
++)
5541 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
5542 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
5543 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
5546 && !avals
.m_known_vals
[i
])
5548 ipcp_value
<tree
> *val
;
5549 for (val
= lat
->values
; val
; val
= val
->next
)
5550 ret
|= decide_about_value (node
, i
, -1, val
, &avals
);
5553 if (!plats
->aggs_bottom
)
5555 struct ipcp_agg_lattice
*aglat
;
5556 ipcp_value
<tree
> *val
;
5557 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
5558 if (!aglat
->bottom
&& aglat
->values
5559 /* If the following is false, the one value has been considered
5560 for cloning for all contexts. */
5561 && (plats
->aggs_contain_variable
5562 || !aglat
->is_single_const ()))
5563 for (val
= aglat
->values
; val
; val
= val
->next
)
5564 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
);
5568 && avals
.m_known_contexts
[i
].useless_p ())
5570 ipcp_value
<ipa_polymorphic_call_context
> *val
;
5571 for (val
= ctxlat
->values
; val
; val
= val
->next
)
5572 ret
|= decide_about_value (node
, i
, -1, val
, &avals
);
5575 info
= IPA_NODE_REF (node
);
5578 if (info
->do_clone_for_all_contexts
)
5580 struct cgraph_node
*clone
;
5581 vec
<cgraph_edge
*> callers
= node
->collect_callers ();
5583 for (int i
= callers
.length () - 1; i
>= 0; i
--)
5585 cgraph_edge
*cs
= callers
[i
];
5586 class ipa_node_params
*caller_info
= IPA_NODE_REF (cs
->caller
);
5588 if (caller_info
&& caller_info
->node_dead
)
5589 callers
.unordered_remove (i
);
5592 if (!adjust_callers_for_value_intersection (callers
, node
))
5594 /* If node is not called by anyone, or all its caller edges are
5595 self-recursive, the node is not really in use, no need to do
5598 info
->do_clone_for_all_contexts
= false;
5603 fprintf (dump_file
, " - Creating a specialized node of %s "
5604 "for all known contexts.\n", node
->dump_name ());
5606 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
5607 vec
<ipa_polymorphic_call_context
> known_contexts
5608 = copy_useful_known_contexts (avals
.m_known_contexts
);
5609 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
5610 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
5611 ipa_agg_replacement_value
*aggvals
5612 = find_aggregate_values_for_callers_subset (node
, callers
);
5614 if (!known_contexts_useful_p (known_contexts
))
5616 known_contexts
.release ();
5617 known_contexts
= vNULL
;
5619 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
5621 info
= IPA_NODE_REF (node
);
5622 info
->do_clone_for_all_contexts
= false;
5623 IPA_NODE_REF (clone
)->is_all_contexts_clone
= true;
5630 /* Transitively mark all callees of NODE within the same SCC as not dead. */
5633 spread_undeadness (struct cgraph_node
*node
)
5635 struct cgraph_edge
*cs
;
5637 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
5638 if (ipa_edge_within_scc (cs
))
5640 struct cgraph_node
*callee
;
5641 class ipa_node_params
*info
;
5643 callee
= cs
->callee
->function_symbol (NULL
);
5644 info
= IPA_NODE_REF (callee
);
5646 if (info
&& info
->node_dead
)
5648 info
->node_dead
= 0;
5649 spread_undeadness (callee
);
5654 /* Return true if NODE has a caller from outside of its SCC that is not
5655 dead. Worker callback for cgraph_for_node_and_aliases. */
5658 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
5659 void *data ATTRIBUTE_UNUSED
)
5661 struct cgraph_edge
*cs
;
5663 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
5664 if (cs
->caller
->thunk
5665 && cs
->caller
->call_for_symbol_thunks_and_aliases
5666 (has_undead_caller_from_outside_scc_p
, NULL
, true))
5668 else if (!ipa_edge_within_scc (cs
)
5669 && (!IPA_NODE_REF (cs
->caller
) /* Unoptimized caller. */
5670 || !IPA_NODE_REF (cs
->caller
)->node_dead
))
5676 /* Identify nodes within the same SCC as NODE which are no longer needed
5677 because of new clones and will be removed as unreachable. */
5680 identify_dead_nodes (struct cgraph_node
*node
)
5682 struct cgraph_node
*v
;
5683 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
5686 && !v
->call_for_symbol_thunks_and_aliases
5687 (has_undead_caller_from_outside_scc_p
, NULL
, true))
5688 IPA_NODE_REF (v
)->node_dead
= 1;
5690 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
5691 if (IPA_NODE_REF (v
) && !IPA_NODE_REF (v
)->node_dead
)
5692 spread_undeadness (v
);
5694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5696 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
5697 if (IPA_NODE_REF (v
) && IPA_NODE_REF (v
)->node_dead
)
5698 fprintf (dump_file
, " Marking node as dead: %s.\n", v
->dump_name ());
5702 /* The decision stage. Iterate over the topological order of call graph nodes
5703 TOPO and make specialized clones if deemed beneficial. */
5706 ipcp_decision_stage (class ipa_topo_info
*topo
)
5711 fprintf (dump_file
, "\nIPA decision stage:\n\n");
5713 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
5715 struct cgraph_node
*node
= topo
->order
[i
];
5716 bool change
= false, iterate
= true;
5720 struct cgraph_node
*v
;
5722 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
5723 if (v
->has_gimple_body_p ()
5724 && ipcp_versionable_function_p (v
))
5725 iterate
|= decide_whether_version_node (v
);
5730 identify_dead_nodes (node
);
5734 /* Look up all the bits information that we have discovered and copy it over
5735 to the transformation summary. */
5738 ipcp_store_bits_results (void)
5742 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
5744 ipa_node_params
*info
= IPA_NODE_REF (node
);
5745 bool dumped_sth
= false;
5746 bool found_useful_result
= false;
5748 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
5751 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
5752 "; -fipa-bit-cp: disabled.\n",
5753 node
->dump_name ());
5757 if (info
->ipcp_orig_node
)
5758 info
= IPA_NODE_REF (info
->ipcp_orig_node
);
5759 if (!info
->lattices
)
5760 /* Newly expanded artificial thunks do not have lattices. */
5763 unsigned count
= ipa_get_param_count (info
);
5764 for (unsigned i
= 0; i
< count
; i
++)
5766 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
5767 if (plats
->bits_lattice
.constant_p ())
5769 found_useful_result
= true;
5774 if (!found_useful_result
)
5777 ipcp_transformation_initialize ();
5778 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
5779 vec_safe_reserve_exact (ts
->bits
, count
);
5781 for (unsigned i
= 0; i
< count
; i
++)
5783 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
5786 if (plats
->bits_lattice
.constant_p ())
5789 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
5790 plats
->bits_lattice
.get_mask ());
5791 if (!dbg_cnt (ipa_cp_bits
))
5797 ts
->bits
->quick_push (jfbits
);
5798 if (!dump_file
|| !jfbits
)
5802 fprintf (dump_file
, "Propagated bits info for function %s:\n",
5803 node
->dump_name ());
5806 fprintf (dump_file
, " param %i: value = ", i
);
5807 print_hex (jfbits
->value
, dump_file
);
5808 fprintf (dump_file
, ", mask = ");
5809 print_hex (jfbits
->mask
, dump_file
);
5810 fprintf (dump_file
, "\n");
5815 /* Look up all VR information that we have discovered and copy it over
5816 to the transformation summary. */
5819 ipcp_store_vr_results (void)
5823 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
5825 ipa_node_params
*info
= IPA_NODE_REF (node
);
5826 bool found_useful_result
= false;
5828 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
5831 fprintf (dump_file
, "Not considering %s for VR discovery "
5832 "and propagate; -fipa-ipa-vrp: disabled.\n",
5833 node
->dump_name ());
5837 if (info
->ipcp_orig_node
)
5838 info
= IPA_NODE_REF (info
->ipcp_orig_node
);
5839 if (!info
->lattices
)
5840 /* Newly expanded artificial thunks do not have lattices. */
5843 unsigned count
= ipa_get_param_count (info
);
5844 for (unsigned i
= 0; i
< count
; i
++)
5846 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
5847 if (!plats
->m_value_range
.bottom_p ()
5848 && !plats
->m_value_range
.top_p ())
5850 found_useful_result
= true;
5854 if (!found_useful_result
)
5857 ipcp_transformation_initialize ();
5858 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
5859 vec_safe_reserve_exact (ts
->m_vr
, count
);
5861 for (unsigned i
= 0; i
< count
; i
++)
5863 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
5866 if (!plats
->m_value_range
.bottom_p ()
5867 && !plats
->m_value_range
.top_p ())
5870 vr
.type
= plats
->m_value_range
.m_vr
.kind ();
5871 vr
.min
= wi::to_wide (plats
->m_value_range
.m_vr
.min ());
5872 vr
.max
= wi::to_wide (plats
->m_value_range
.m_vr
.max ());
5877 vr
.type
= VR_VARYING
;
5878 vr
.min
= vr
.max
= wi::zero (INT_TYPE_SIZE
);
5880 ts
->m_vr
->quick_push (vr
);
5885 /* The IPCP driver. */
5890 class ipa_topo_info topo
;
5892 if (edge_clone_summaries
== NULL
)
5893 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
5895 ipa_check_create_node_params ();
5896 ipa_check_create_edge_args ();
5897 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
5901 fprintf (dump_file
, "\nIPA structures before propagation:\n");
5902 if (dump_flags
& TDF_DETAILS
)
5903 ipa_print_all_params (dump_file
);
5904 ipa_print_all_jump_functions (dump_file
);
5907 /* Topological sort. */
5908 build_toporder_info (&topo
);
5909 /* Do the interprocedural propagation. */
5910 ipcp_propagate_stage (&topo
);
5911 /* Decide what constant propagation and cloning should be performed. */
5912 ipcp_decision_stage (&topo
);
5913 /* Store results of bits propagation. */
5914 ipcp_store_bits_results ();
5915 /* Store results of value range propagation. */
5916 ipcp_store_vr_results ();
5918 /* Free all IPCP structures. */
5919 delete clone_num_suffixes
;
5920 free_toporder_info (&topo
);
5921 delete edge_clone_summaries
;
5922 edge_clone_summaries
= NULL
;
5923 ipa_free_all_structures_after_ipa_cp ();
5925 fprintf (dump_file
, "\nIPA constant propagation end\n");
5929 /* Initialization and computation of IPCP data structures. This is the initial
5930 intraprocedural analysis of functions, which gathers information to be
5931 propagated later on. */
5934 ipcp_generate_summary (void)
5936 struct cgraph_node
*node
;
5939 fprintf (dump_file
, "\nIPA constant propagation start:\n");
5940 ipa_register_cgraph_hooks ();
5942 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
5943 ipa_analyze_node (node
);
5946 /* Write ipcp summary for nodes in SET. */
5949 ipcp_write_summary (void)
5951 ipa_prop_write_jump_functions ();
5954 /* Read ipcp summary. */
5957 ipcp_read_summary (void)
5959 ipa_prop_read_jump_functions ();
5964 const pass_data pass_data_ipa_cp
=
5966 IPA_PASS
, /* type */
5968 OPTGROUP_NONE
, /* optinfo_flags */
5969 TV_IPA_CONSTANT_PROP
, /* tv_id */
5970 0, /* properties_required */
5971 0, /* properties_provided */
5972 0, /* properties_destroyed */
5973 0, /* todo_flags_start */
5974 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
5977 class pass_ipa_cp
: public ipa_opt_pass_d
5980 pass_ipa_cp (gcc::context
*ctxt
)
5981 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
5982 ipcp_generate_summary
, /* generate_summary */
5983 ipcp_write_summary
, /* write_summary */
5984 ipcp_read_summary
, /* read_summary */
5985 ipcp_write_transformation_summaries
, /*
5986 write_optimization_summary */
5987 ipcp_read_transformation_summaries
, /*
5988 read_optimization_summary */
5989 NULL
, /* stmt_fixup */
5990 0, /* function_transform_todo_flags_start */
5991 ipcp_transform_function
, /* function_transform */
5992 NULL
) /* variable_transform */
5995 /* opt_pass methods: */
5996 virtual bool gate (function
*)
5998 /* FIXME: We should remove the optimize check after we ensure we never run
5999 IPA passes when not optimizing. */
6000 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6003 virtual unsigned int execute (function
*) { return ipcp_driver (); }
6005 }; // class pass_ipa_cp
6010 make_pass_ipa_cp (gcc::context
*ctxt
)
6012 return new pass_ipa_cp (ctxt
);
6015 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6016 within the same process. For use by toplev::finalize. */
6019 ipa_cp_c_finalize (void)
6021 max_count
= profile_count::uninitialized ();
6023 orig_overall_size
= 0;
6024 ipcp_free_transformation_sum ();