ada: Fix deferred constant wrongly rejected
[official-gcc.git] / gcc / ipa-cp.cc
blob071c607fbe880c7e664d6ddfe9b26dd65dad2783
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype> class ipcp_value;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype>
138 struct ipcp_value_source
140 public:
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset;
144 /* The incoming edge that brought the value. */
145 cgraph_edge *cs;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value<valtype> *val;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source *next;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
155 int index;
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
162 public:
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit;
166 /* Time benefit that specializing the function for this value can bring about
167 in it's callees. */
168 sreal prop_time_benefit;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
171 int local_size_cost;
172 /* Size cost that specializing the function for this value can bring about in
173 it's callees. */
174 int prop_size_cost;
176 ipcp_value_base ()
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype>
184 class ipcp_value : public ipcp_value_base
186 public:
187 /* The actual value for the given parameter. */
188 valtype value;
189 /* The list of sources from which this value originates. */
190 ipcp_value_source <valtype> *sources = nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value *next = nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
194 of values. */
195 ipcp_value *scc_next = nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value *topo_next = nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
200 created. */
201 cgraph_node *spec_node = nullptr;
202 /* Depth first search number and low link for topological sorting of
203 values. */
204 int dfs = 0;
205 int low_link = 0;
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
208 int scc_no = 0;
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
213 maximum. */
214 unsigned self_recursion_generated_level = 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack = false;
218 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
219 HOST_WIDE_INT offset);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value<valtype> *o)
225 return o->scc_no == scc_no;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level > 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype>
245 struct ipcp_lattice
247 public:
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value<valtype> *values;
252 /* Number of known values and types in this lattice. */
253 int values_count;
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 propagation). */
258 bool bottom;
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval, cgraph_edge *cs,
264 ipcp_value<valtype> *src_val = NULL,
265 int src_idx = 0, HOST_WIDE_INT offset = -1,
266 ipcp_value<valtype> **val_p = NULL,
267 unsigned same_lat_gen_level = 0);
268 void print (FILE * f, bool dump_sources, bool dump_benefits);
271 /* Lattice of tree values with an offset to describe a part of an
272 aggregate. */
274 struct ipcp_agg_lattice : public ipcp_lattice<tree>
276 public:
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
281 HOST_WIDE_INT size;
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice *next;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
288 value are constant.
289 For eg:
290 int f(int x)
292 return some_op (x);
295 int f1(int y)
297 if (cond)
298 return f (y & 0xff);
299 else
300 return f (y & 0xf);
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
311 public:
312 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
313 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
314 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int, widest_int);
317 bool known_nonzero_p () const;
319 widest_int get_value () const { return m_value; }
320 widest_int get_mask () const { return m_mask; }
322 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
323 enum tree_code, tree, bool);
325 bool meet_with (widest_int, widest_int, unsigned);
327 void print (FILE *);
329 private:
330 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value, m_mask;
337 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
338 void get_value_and_mask (tree, widest_int *, widest_int *);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
345 public:
346 Value_Range m_vr;
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange &p_vr);
352 bool meet_with (const ipcp_vr_lattice &other);
353 void init (tree type);
354 void print (FILE * f);
356 private:
357 bool meet_with_1 (const vrange &other_vr);
360 inline void
361 ipcp_vr_lattice::init (tree type)
363 if (type)
364 m_vr.set_type (type);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
375 public:
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice<tree> itself;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice *aggs;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range;
386 /* Number of aggregate lattices */
387 int aggs_count;
388 /* True if aggregate data were passed by reference (as opposed to by
389 value). */
390 bool aggs_by_ref;
391 /* All aggregate lattices contain a variable component (in addition to
392 values). */
393 bool aggs_contain_variable;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
396 bool aggs_bottom;
398 /* There is a virtual call based on this parameter. */
399 bool virt_call;
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count;
420 /* Original overall size of the program. */
422 static long overall_size, orig_overall_size;
424 /* Node name to unique clone suffix number map. */
425 static hash_map<const char *, unsigned> *clone_num_suffixes;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices *
430 ipa_get_parm_lattices (class ipa_node_params *info, int i)
432 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
433 gcc_checking_assert (!info->ipcp_orig_node);
434 gcc_checking_assert (info->lattices);
435 return &(info->lattices[i]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice<tree> *
441 ipa_get_scalar_lat (class ipa_node_params *info, int i)
443 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
444 return &plats->itself;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice<ipa_polymorphic_call_context> *
450 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
452 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
453 return &plats->ctxlat;
456 /* Return whether LAT is a lattice with a single constant and without an
457 undefined value. */
459 template <typename valtype>
460 inline bool
461 ipcp_lattice<valtype>::is_single_const ()
463 if (bottom || contains_variable || values_count != 1)
464 return false;
465 else
466 return true;
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
471 static bool
472 values_equal_for_ipcp_p (tree x, tree y)
474 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
476 if (x == y)
477 return true;
479 if (TREE_CODE (x) == ADDR_EXPR
480 && TREE_CODE (y) == ADDR_EXPR
481 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
482 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
483 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
484 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
485 else
486 return operand_equal_p (x, y, 0);
489 /* Print V which is extracted from a value in a lattice to F. */
491 static void
492 print_ipcp_constant_value (FILE * f, tree v)
494 if (TREE_CODE (v) == ADDR_EXPR
495 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
497 fprintf (f, "& ");
498 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
500 else
501 print_generic_expr (f, v);
504 /* Print V which is extracted from a value in a lattice to F. */
506 static void
507 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
509 v.dump(f, false);
512 /* Print a lattice LAT to F. */
514 template <typename valtype>
515 void
516 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
518 ipcp_value<valtype> *val;
519 bool prev = false;
521 if (bottom)
523 fprintf (f, "BOTTOM\n");
524 return;
527 if (!values_count && !contains_variable)
529 fprintf (f, "TOP\n");
530 return;
533 if (contains_variable)
535 fprintf (f, "VARIABLE");
536 prev = true;
537 if (dump_benefits)
538 fprintf (f, "\n");
541 for (val = values; val; val = val->next)
543 if (dump_benefits && prev)
544 fprintf (f, " ");
545 else if (!dump_benefits && prev)
546 fprintf (f, ", ");
547 else
548 prev = true;
550 print_ipcp_constant_value (f, val->value);
552 if (dump_sources)
554 ipcp_value_source<valtype> *s;
556 if (val->self_recursion_generated_p ())
557 fprintf (f, " [self_gen(%i), from:",
558 val->self_recursion_generated_level);
559 else
560 fprintf (f, " [scc: %i, from:", val->scc_no);
561 for (s = val->sources; s; s = s->next)
562 fprintf (f, " %i(%f)", s->cs->caller->order,
563 s->cs->sreal_frequency ().to_double ());
564 fprintf (f, "]");
567 if (dump_benefits)
568 fprintf (f, " [loc_time: %g, loc_size: %i, "
569 "prop_time: %g, prop_size: %i]\n",
570 val->local_time_benefit.to_double (), val->local_size_cost,
571 val->prop_time_benefit.to_double (), val->prop_size_cost);
573 if (!dump_benefits)
574 fprintf (f, "\n");
577 void
578 ipcp_bits_lattice::print (FILE *f)
580 if (top_p ())
581 fprintf (f, " Bits unknown (TOP)\n");
582 else if (bottom_p ())
583 fprintf (f, " Bits unusable (BOTTOM)\n");
584 else
586 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
587 fprintf (f, ", mask = "); print_hex (get_mask (), f);
588 fprintf (f, "\n");
592 /* Print value range lattice to F. */
594 void
595 ipcp_vr_lattice::print (FILE * f)
597 m_vr.dump (f);
600 /* Print all ipcp_lattices of all functions to F. */
602 static void
603 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
605 struct cgraph_node *node;
606 int i, count;
608 fprintf (f, "\nLattices:\n");
609 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
611 class ipa_node_params *info;
613 info = ipa_node_params_sum->get (node);
614 /* Skip unoptimized functions and constprop clones since we don't make
615 lattices for them. */
616 if (!info || info->ipcp_orig_node)
617 continue;
618 fprintf (f, " Node: %s:\n", node->dump_name ());
619 count = ipa_get_param_count (info);
620 for (i = 0; i < count; i++)
622 struct ipcp_agg_lattice *aglat;
623 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
624 fprintf (f, " param [%d]: ", i);
625 plats->itself.print (f, dump_sources, dump_benefits);
626 fprintf (f, " ctxs: ");
627 plats->ctxlat.print (f, dump_sources, dump_benefits);
628 plats->bits_lattice.print (f);
629 fprintf (f, " ");
630 plats->m_value_range.print (f);
631 fprintf (f, "\n");
632 if (plats->virt_call)
633 fprintf (f, " virt_call flag set\n");
635 if (plats->aggs_bottom)
637 fprintf (f, " AGGS BOTTOM\n");
638 continue;
640 if (plats->aggs_contain_variable)
641 fprintf (f, " AGGS VARIABLE\n");
642 for (aglat = plats->aggs; aglat; aglat = aglat->next)
644 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
645 plats->aggs_by_ref ? "ref " : "", aglat->offset);
646 aglat->print (f, dump_sources, dump_benefits);
652 /* Determine whether it is at all technically possible to create clones of NODE
653 and store this information in the ipa_node_params structure associated
654 with NODE. */
656 static void
657 determine_versionability (struct cgraph_node *node,
658 class ipa_node_params *info)
660 const char *reason = NULL;
662 /* There are a number of generic reasons functions cannot be versioned. We
663 also cannot remove parameters if there are type attributes such as fnspec
664 present. */
665 if (node->alias || node->thunk)
666 reason = "alias or thunk";
667 else if (!node->versionable)
668 reason = "not a tree_versionable_function";
669 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
670 reason = "insufficient body availability";
671 else if (!opt_for_fn (node->decl, optimize)
672 || !opt_for_fn (node->decl, flag_ipa_cp))
673 reason = "non-optimized function";
674 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
676 /* Ideally we should clone the SIMD clones themselves and create
677 vector copies of them, so IPA-cp and SIMD clones can happily
678 coexist, but that may not be worth the effort. */
679 reason = "function has SIMD clones";
681 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
683 /* Ideally we should clone the target clones themselves and create
684 copies of them, so IPA-cp and target clones can happily
685 coexist, but that may not be worth the effort. */
686 reason = "function target_clones attribute";
688 /* Don't clone decls local to a comdat group; it breaks and for C++
689 decloned constructors, inlining is always better anyway. */
690 else if (node->comdat_local_p ())
691 reason = "comdat-local function";
692 else if (node->calls_comdat_local)
694 /* TODO: call is versionable if we make sure that all
695 callers are inside of a comdat group. */
696 reason = "calls comdat-local function";
699 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
700 work only when inlined. Cloning them may still lead to better code
701 because ipa-cp will not give up on cloning further. If the function is
702 external this however leads to wrong code because we may end up producing
703 offline copy of the function. */
704 if (DECL_EXTERNAL (node->decl))
705 for (cgraph_edge *edge = node->callees; !reason && edge;
706 edge = edge->next_callee)
707 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
709 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
710 reason = "external function which calls va_arg_pack";
711 if (DECL_FUNCTION_CODE (edge->callee->decl)
712 == BUILT_IN_VA_ARG_PACK_LEN)
713 reason = "external function which calls va_arg_pack_len";
716 if (reason && dump_file && !node->alias && !node->thunk)
717 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
718 node->dump_name (), reason);
720 info->versionable = (reason == NULL);
723 /* Return true if it is at all technically possible to create clones of a
724 NODE. */
726 static bool
727 ipcp_versionable_function_p (struct cgraph_node *node)
729 ipa_node_params *info = ipa_node_params_sum->get (node);
730 return info && info->versionable;
733 /* Structure holding accumulated information about callers of a node. */
735 struct caller_statistics
737 /* If requested (see below), self-recursive call counts are summed into this
738 field. */
739 profile_count rec_count_sum;
740 /* The sum of all ipa counts of all the other (non-recursive) calls. */
741 profile_count count_sum;
742 /* Sum of all frequencies for all calls. */
743 sreal freq_sum;
744 /* Number of calls and hot calls respectively. */
745 int n_calls, n_hot_calls;
746 /* If itself is set up, also count the number of non-self-recursive
747 calls. */
748 int n_nonrec_calls;
749 /* If non-NULL, this is the node itself and calls from it should have their
750 counts included in rec_count_sum and not count_sum. */
751 cgraph_node *itself;
754 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
755 from IGNORED_CALLER are not counted. */
757 static inline void
758 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
760 stats->rec_count_sum = profile_count::zero ();
761 stats->count_sum = profile_count::zero ();
762 stats->n_calls = 0;
763 stats->n_hot_calls = 0;
764 stats->n_nonrec_calls = 0;
765 stats->freq_sum = 0;
766 stats->itself = itself;
769 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
770 non-thunk incoming edges to NODE. */
772 static bool
773 gather_caller_stats (struct cgraph_node *node, void *data)
775 struct caller_statistics *stats = (struct caller_statistics *) data;
776 struct cgraph_edge *cs;
778 for (cs = node->callers; cs; cs = cs->next_caller)
779 if (!cs->caller->thunk)
781 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
782 if (info && info->node_dead)
783 continue;
785 if (cs->count.ipa ().initialized_p ())
787 if (stats->itself && stats->itself == cs->caller)
788 stats->rec_count_sum += cs->count.ipa ();
789 else
790 stats->count_sum += cs->count.ipa ();
792 stats->freq_sum += cs->sreal_frequency ();
793 stats->n_calls++;
794 if (stats->itself && stats->itself != cs->caller)
795 stats->n_nonrec_calls++;
797 if (cs->maybe_hot_p ())
798 stats->n_hot_calls ++;
800 return false;
804 /* Return true if this NODE is viable candidate for cloning. */
806 static bool
807 ipcp_cloning_candidate_p (struct cgraph_node *node)
809 struct caller_statistics stats;
811 gcc_checking_assert (node->has_gimple_body_p ());
813 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
815 if (dump_file)
816 fprintf (dump_file, "Not considering %s for cloning; "
817 "-fipa-cp-clone disabled.\n",
818 node->dump_name ());
819 return false;
822 if (node->optimize_for_size_p ())
824 if (dump_file)
825 fprintf (dump_file, "Not considering %s for cloning; "
826 "optimizing it for size.\n",
827 node->dump_name ());
828 return false;
831 init_caller_stats (&stats);
832 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
834 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
836 if (dump_file)
837 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
838 node->dump_name ());
839 return true;
842 /* When profile is available and function is hot, propagate into it even if
843 calls seems cold; constant propagation can improve function's speed
844 significantly. */
845 if (stats.count_sum > profile_count::zero ()
846 && node->count.ipa ().initialized_p ())
848 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
850 if (dump_file)
851 fprintf (dump_file, "Considering %s for cloning; "
852 "usually called directly.\n",
853 node->dump_name ());
854 return true;
857 if (!stats.n_hot_calls)
859 if (dump_file)
860 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
861 node->dump_name ());
862 return false;
864 if (dump_file)
865 fprintf (dump_file, "Considering %s for cloning.\n",
866 node->dump_name ());
867 return true;
870 template <typename valtype>
871 class value_topo_info
873 public:
874 /* Head of the linked list of topologically sorted values. */
875 ipcp_value<valtype> *values_topo;
876 /* Stack for creating SCCs, represented by a linked list too. */
877 ipcp_value<valtype> *stack;
878 /* Counter driving the algorithm in add_val_to_toposort. */
879 int dfs_counter;
881 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
883 void add_val (ipcp_value<valtype> *cur_val);
884 void propagate_effects ();
887 /* Arrays representing a topological ordering of call graph nodes and a stack
888 of nodes used during constant propagation and also data required to perform
889 topological sort of values and propagation of benefits in the determined
890 order. */
892 class ipa_topo_info
894 public:
895 /* Array with obtained topological order of cgraph nodes. */
896 struct cgraph_node **order;
897 /* Stack of cgraph nodes used during propagation within SCC until all values
898 in the SCC stabilize. */
899 struct cgraph_node **stack;
900 int nnodes, stack_top;
902 value_topo_info<tree> constants;
903 value_topo_info<ipa_polymorphic_call_context> contexts;
905 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
906 constants ()
910 /* Skip edges from and to nodes without ipa_cp enabled.
911 Ignore not available symbols. */
913 static bool
914 ignore_edge_p (cgraph_edge *e)
916 enum availability avail;
917 cgraph_node *ultimate_target
918 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
920 return (avail <= AVAIL_INTERPOSABLE
921 || !opt_for_fn (ultimate_target->decl, optimize)
922 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
925 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
927 static void
928 build_toporder_info (class ipa_topo_info *topo)
930 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
931 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
933 gcc_checking_assert (topo->stack_top == 0);
934 topo->nnodes = ipa_reduced_postorder (topo->order, true,
935 ignore_edge_p);
938 /* Free information about strongly connected components and the arrays in
939 TOPO. */
941 static void
942 free_toporder_info (class ipa_topo_info *topo)
944 ipa_free_postorder_info ();
945 free (topo->order);
946 free (topo->stack);
949 /* Add NODE to the stack in TOPO, unless it is already there. */
951 static inline void
952 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
954 ipa_node_params *info = ipa_node_params_sum->get (node);
955 if (info->node_enqueued)
956 return;
957 info->node_enqueued = 1;
958 topo->stack[topo->stack_top++] = node;
961 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
962 is empty. */
964 static struct cgraph_node *
965 pop_node_from_stack (class ipa_topo_info *topo)
967 if (topo->stack_top)
969 struct cgraph_node *node;
970 topo->stack_top--;
971 node = topo->stack[topo->stack_top];
972 ipa_node_params_sum->get (node)->node_enqueued = 0;
973 return node;
975 else
976 return NULL;
979 /* Set lattice LAT to bottom and return true if it previously was not set as
980 such. */
982 template <typename valtype>
983 inline bool
984 ipcp_lattice<valtype>::set_to_bottom ()
986 bool ret = !bottom;
987 bottom = true;
988 return ret;
991 /* Mark lattice as containing an unknown value and return true if it previously
992 was not marked as such. */
994 template <typename valtype>
995 inline bool
996 ipcp_lattice<valtype>::set_contains_variable ()
998 bool ret = !contains_variable;
999 contains_variable = true;
1000 return ret;
1003 /* Set all aggregate lattices in PLATS to bottom and return true if they were
1004 not previously set as such. */
1006 static inline bool
1007 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
1009 bool ret = !plats->aggs_bottom;
1010 plats->aggs_bottom = true;
1011 return ret;
1014 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1015 return true if they were not previously marked as such. */
1017 static inline bool
1018 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1020 bool ret = !plats->aggs_contain_variable;
1021 plats->aggs_contain_variable = true;
1022 return ret;
1025 bool
1026 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1028 return meet_with_1 (other.m_vr);
1031 /* Meet the current value of the lattice with the range described by
1032 P_VR. */
1034 bool
1035 ipcp_vr_lattice::meet_with (const vrange &p_vr)
1037 return meet_with_1 (p_vr);
1040 /* Meet the current value of the lattice with the range described by
1041 OTHER_VR. Return TRUE if anything changed. */
1043 bool
1044 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
1046 if (bottom_p ())
1047 return false;
1049 if (other_vr.varying_p ())
1050 return set_to_bottom ();
1052 bool res;
1053 if (flag_checking)
1055 Value_Range save (m_vr);
1056 res = m_vr.union_ (other_vr);
1057 gcc_assert (res == (m_vr != save));
1059 else
1060 res = m_vr.union_ (other_vr);
1061 return res;
1064 /* Return true if value range information in the lattice is yet unknown. */
1066 bool
1067 ipcp_vr_lattice::top_p () const
1069 return m_vr.undefined_p ();
1072 /* Return true if value range information in the lattice is known to be
1073 unusable. */
1075 bool
1076 ipcp_vr_lattice::bottom_p () const
1078 return m_vr.varying_p ();
1081 /* Set value range information in the lattice to bottom. Return true if it
1082 previously was in a different state. */
1084 bool
1085 ipcp_vr_lattice::set_to_bottom ()
1087 if (m_vr.varying_p ())
1088 return false;
1090 /* Setting an unsupported type here forces the temporary to default
1091 to unsupported_range, which can handle VARYING/DEFINED ranges,
1092 but nothing else (union, intersect, etc). This allows us to set
1093 bottoms on any ranges, and is safe as all users of the lattice
1094 check for bottom first. */
1095 m_vr.set_type (void_type_node);
1096 m_vr.set_varying (void_type_node);
1098 return true;
1101 /* Set lattice value to bottom, if it already isn't the case. */
1103 bool
1104 ipcp_bits_lattice::set_to_bottom ()
1106 if (bottom_p ())
1107 return false;
1108 m_lattice_val = IPA_BITS_VARYING;
1109 m_value = 0;
1110 m_mask = -1;
1111 return true;
1114 /* Set to constant if it isn't already. Only meant to be called
1115 when switching state from TOP. */
1117 bool
1118 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1120 gcc_assert (top_p ());
1121 m_lattice_val = IPA_BITS_CONSTANT;
1122 m_value = wi::bit_and (wi::bit_not (mask), value);
1123 m_mask = mask;
1124 return true;
1127 /* Return true if any of the known bits are non-zero. */
1129 bool
1130 ipcp_bits_lattice::known_nonzero_p () const
1132 if (!constant_p ())
1133 return false;
1134 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1137 /* Convert operand to value, mask form. */
1139 void
1140 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1142 wide_int get_nonzero_bits (const_tree);
1144 if (TREE_CODE (operand) == INTEGER_CST)
1146 *valuep = wi::to_widest (operand);
1147 *maskp = 0;
1149 else
1151 *valuep = 0;
1152 *maskp = -1;
1156 /* Meet operation, similar to ccp_lattice_meet, we xor values
1157 if this->value, value have different values at same bit positions, we want
1158 to drop that bit to varying. Return true if mask is changed.
1159 This function assumes that the lattice value is in CONSTANT state. If
1160 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1162 bool
1163 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1164 unsigned precision, bool drop_all_ones)
1166 gcc_assert (constant_p ());
1168 widest_int old_mask = m_mask;
1169 m_mask = (m_mask | mask) | (m_value ^ value);
1170 if (drop_all_ones)
1171 m_mask |= m_value;
1172 m_value &= ~m_mask;
1174 if (wi::sext (m_mask, precision) == -1)
1175 return set_to_bottom ();
1177 return m_mask != old_mask;
1180 /* Meet the bits lattice with operand
1181 described by <value, mask, sgn, precision. */
1183 bool
1184 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1185 unsigned precision)
1187 if (bottom_p ())
1188 return false;
1190 if (top_p ())
1192 if (wi::sext (mask, precision) == -1)
1193 return set_to_bottom ();
1194 return set_to_constant (value, mask);
1197 return meet_with_1 (value, mask, precision, false);
1200 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1201 if code is binary operation or bit_value_unop (other) if code is unary op.
1202 In the case when code is nop_expr, no adjustment is required. If
1203 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1205 bool
1206 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1207 signop sgn, enum tree_code code, tree operand,
1208 bool drop_all_ones)
1210 if (other.bottom_p ())
1211 return set_to_bottom ();
1213 if (bottom_p () || other.top_p ())
1214 return false;
1216 widest_int adjusted_value, adjusted_mask;
1218 if (TREE_CODE_CLASS (code) == tcc_binary)
1220 tree type = TREE_TYPE (operand);
1221 widest_int o_value, o_mask;
1222 get_value_and_mask (operand, &o_value, &o_mask);
1224 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1225 sgn, precision, other.get_value (), other.get_mask (),
1226 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1228 if (wi::sext (adjusted_mask, precision) == -1)
1229 return set_to_bottom ();
1232 else if (TREE_CODE_CLASS (code) == tcc_unary)
1234 bit_value_unop (code, sgn, precision, &adjusted_value,
1235 &adjusted_mask, sgn, precision, other.get_value (),
1236 other.get_mask ());
1238 if (wi::sext (adjusted_mask, precision) == -1)
1239 return set_to_bottom ();
1242 else
1243 return set_to_bottom ();
1245 if (top_p ())
1247 if (drop_all_ones)
1249 adjusted_mask |= adjusted_value;
1250 adjusted_value &= ~adjusted_mask;
1252 if (wi::sext (adjusted_mask, precision) == -1)
1253 return set_to_bottom ();
1254 return set_to_constant (adjusted_value, adjusted_mask);
1256 else
1257 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1258 drop_all_ones);
1261 /* Dump the contents of the list to FILE. */
1263 void
1264 ipa_argagg_value_list::dump (FILE *f)
1266 bool comma = false;
1267 for (const ipa_argagg_value &av : m_elts)
1269 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1270 av.index, av.unit_offset);
1271 print_generic_expr (f, av.value);
1272 if (av.by_ref)
1273 fprintf (f, "(by_ref)");
1274 comma = true;
1276 fprintf (f, "\n");
1279 /* Dump the contents of the list to stderr. */
1281 void
1282 ipa_argagg_value_list::debug ()
1284 dump (stderr);
1287 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1288 NULL if there is no such constant. */
1290 const ipa_argagg_value *
1291 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1293 ipa_argagg_value key;
1294 key.index = index;
1295 key.unit_offset = unit_offset;
1296 const ipa_argagg_value *res
1297 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1298 [] (const ipa_argagg_value &elt,
1299 const ipa_argagg_value &val)
1301 if (elt.index < val.index)
1302 return true;
1303 if (elt.index > val.index)
1304 return false;
1305 if (elt.unit_offset < val.unit_offset)
1306 return true;
1307 return false;
1310 if (res == m_elts.end ()
1311 || res->index != index
1312 || res->unit_offset != unit_offset)
1313 res = nullptr;
1315 /* TODO: perhaps remove the check (that the underlying array is indeed
1316 sorted) if it turns out it can be too slow? */
1317 if (!flag_checking)
1318 return res;
1320 const ipa_argagg_value *slow_res = NULL;
1321 int prev_index = -1;
1322 unsigned prev_unit_offset = 0;
1323 for (const ipa_argagg_value &av : m_elts)
1325 gcc_assert (prev_index < 0
1326 || prev_index < av.index
1327 || prev_unit_offset < av.unit_offset);
1328 prev_index = av.index;
1329 prev_unit_offset = av.unit_offset;
1330 if (av.index == index
1331 && av.unit_offset == unit_offset)
1332 slow_res = &av;
1334 gcc_assert (res == slow_res);
1336 return res;
1339 /* Return the first item describing a constant stored for parameter with INDEX,
1340 regardless of offset or reference, or NULL if there is no such constant. */
1342 const ipa_argagg_value *
1343 ipa_argagg_value_list::get_elt_for_index (int index) const
1345 const ipa_argagg_value *res
1346 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1347 [] (const ipa_argagg_value &elt, unsigned idx)
1349 return elt.index < idx;
1351 if (res == m_elts.end ()
1352 || res->index != index)
1353 res = nullptr;
1354 return res;
1357 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1358 performing any check of whether value is passed by reference, or NULL_TREE
1359 if there is no such constant. */
1361 tree
1362 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1364 const ipa_argagg_value *av = get_elt (index, unit_offset);
1365 return av ? av->value : NULL_TREE;
1368 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1369 passed by reference or not according to BY_REF, or NULL_TREE if there is
1370 no such constant. */
1372 tree
1373 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1374 bool by_ref) const
1376 const ipa_argagg_value *av = get_elt (index, unit_offset);
1377 if (av && av->by_ref == by_ref)
1378 return av->value;
1379 return NULL_TREE;
1382 /* Return true if all elements present in OTHER are also present in this
1383 list. */
1385 bool
1386 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1388 unsigned j = 0;
1389 for (unsigned i = 0; i < other.m_elts.size (); i++)
1391 unsigned other_index = other.m_elts[i].index;
1392 unsigned other_offset = other.m_elts[i].unit_offset;
1394 while (j < m_elts.size ()
1395 && (m_elts[j].index < other_index
1396 || (m_elts[j].index == other_index
1397 && m_elts[j].unit_offset < other_offset)))
1398 j++;
1400 if (j >= m_elts.size ()
1401 || m_elts[j].index != other_index
1402 || m_elts[j].unit_offset != other_offset
1403 || m_elts[j].by_ref != other.m_elts[i].by_ref
1404 || !m_elts[j].value
1405 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1406 return false;
1408 return true;
1411 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1412 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1413 offsets but skip those which would end up with a negative offset. */
1415 void
1416 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1417 unsigned dest_index,
1418 unsigned unit_delta,
1419 vec<ipa_argagg_value> *res) const
1421 const ipa_argagg_value *av = get_elt_for_index (src_index);
1422 if (!av)
1423 return;
1424 unsigned prev_unit_offset = 0;
1425 bool first = true;
1426 for (; av < m_elts.end (); ++av)
1428 if (av->index > src_index)
1429 return;
1430 if (av->index == src_index
1431 && (av->unit_offset >= unit_delta)
1432 && av->value)
1434 ipa_argagg_value new_av;
1435 gcc_checking_assert (av->value);
1436 new_av.value = av->value;
1437 new_av.unit_offset = av->unit_offset - unit_delta;
1438 new_av.index = dest_index;
1439 new_av.by_ref = av->by_ref;
1441 /* Quick check that the offsets we push are indeed increasing. */
1442 gcc_assert (first
1443 || new_av.unit_offset > prev_unit_offset);
1444 prev_unit_offset = new_av.unit_offset;
1445 first = false;
1447 res->safe_push (new_av);
1452 /* Push to RES information about single lattices describing aggregate values in
1453 PLATS as those describing parameter DEST_INDEX and the original offset minus
1454 UNIT_DELTA. Return true if any item has been pushed to RES. */
1456 static bool
1457 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1458 unsigned unit_delta,
1459 vec<ipa_argagg_value> *res)
1461 if (plats->aggs_contain_variable)
1462 return false;
1464 bool pushed_sth = false;
1465 bool first = true;
1466 unsigned prev_unit_offset = 0;
1467 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1468 if (aglat->is_single_const ()
1469 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1471 ipa_argagg_value iav;
1472 iav.value = aglat->values->value;
1473 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1474 iav.index = dest_index;
1475 iav.by_ref = plats->aggs_by_ref;
1477 gcc_assert (first
1478 || iav.unit_offset > prev_unit_offset);
1479 prev_unit_offset = iav.unit_offset;
1480 first = false;
1482 pushed_sth = true;
1483 res->safe_push (iav);
1485 return pushed_sth;
1488 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1489 Return the number of remaining valid entries. */
1491 static unsigned
1492 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1493 const vec<ipa_argagg_value> &other)
1495 unsigned valid_entries = 0;
1496 unsigned j = 0;
1497 for (unsigned i = 0; i < elts.length (); i++)
1499 if (!elts[i].value)
1500 continue;
1502 unsigned this_index = elts[i].index;
1503 unsigned this_offset = elts[i].unit_offset;
1505 while (j < other.length ()
1506 && (other[j].index < this_index
1507 || (other[j].index == this_index
1508 && other[j].unit_offset < this_offset)))
1509 j++;
1511 if (j >= other.length ())
1513 elts[i].value = NULL_TREE;
1514 continue;
1517 if (other[j].index == this_index
1518 && other[j].unit_offset == this_offset
1519 && other[j].by_ref == elts[i].by_ref
1520 && other[j].value
1521 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1522 valid_entries++;
1523 else
1524 elts[i].value = NULL_TREE;
1526 return valid_entries;
1529 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1530 return true is any of them has not been marked as such so far. */
1532 static inline bool
1533 set_all_contains_variable (class ipcp_param_lattices *plats)
1535 bool ret;
1536 ret = plats->itself.set_contains_variable ();
1537 ret |= plats->ctxlat.set_contains_variable ();
1538 ret |= set_agg_lats_contain_variable (plats);
1539 ret |= plats->bits_lattice.set_to_bottom ();
1540 ret |= plats->m_value_range.set_to_bottom ();
1541 return ret;
1544 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1545 points to by the number of callers to NODE. */
1547 static bool
1548 count_callers (cgraph_node *node, void *data)
1550 int *caller_count = (int *) data;
1552 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1553 /* Local thunks can be handled transparently, but if the thunk cannot
1554 be optimized out, count it as a real use. */
1555 if (!cs->caller->thunk || !cs->caller->local)
1556 ++*caller_count;
1557 return false;
1560 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1561 the one caller of some other node. Set the caller's corresponding flag. */
1563 static bool
1564 set_single_call_flag (cgraph_node *node, void *)
1566 cgraph_edge *cs = node->callers;
1567 /* Local thunks can be handled transparently, skip them. */
1568 while (cs && cs->caller->thunk && cs->caller->local)
1569 cs = cs->next_caller;
1570 if (cs)
1571 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1573 info->node_calling_single_call = true;
1574 return true;
1576 return false;
1579 /* Initialize ipcp_lattices. */
1581 static void
1582 initialize_node_lattices (struct cgraph_node *node)
1584 ipa_node_params *info = ipa_node_params_sum->get (node);
1585 struct cgraph_edge *ie;
1586 bool disable = false, variable = false;
1587 int i;
1589 gcc_checking_assert (node->has_gimple_body_p ());
1591 if (!ipa_get_param_count (info))
1592 disable = true;
1593 else if (node->local)
1595 int caller_count = 0;
1596 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1597 true);
1598 gcc_checking_assert (caller_count > 0);
1599 if (caller_count == 1)
1600 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1601 NULL, true);
1603 else
1605 /* When cloning is allowed, we can assume that externally visible
1606 functions are not called. We will compensate this by cloning
1607 later. */
1608 if (ipcp_versionable_function_p (node)
1609 && ipcp_cloning_candidate_p (node))
1610 variable = true;
1611 else
1612 disable = true;
1615 if (dump_file && (dump_flags & TDF_DETAILS)
1616 && !node->alias && !node->thunk)
1618 fprintf (dump_file, "Initializing lattices of %s\n",
1619 node->dump_name ());
1620 if (disable || variable)
1621 fprintf (dump_file, " Marking all lattices as %s\n",
1622 disable ? "BOTTOM" : "VARIABLE");
1625 auto_vec<bool, 16> surviving_params;
1626 bool pre_modified = false;
1628 clone_info *cinfo = clone_info::get (node);
1630 if (!disable && cinfo && cinfo->param_adjustments)
1632 /* At the moment all IPA optimizations should use the number of
1633 parameters of the prevailing decl as the m_always_copy_start.
1634 Handling any other value would complicate the code below, so for the
1635 time bing let's only assert it is so. */
1636 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1637 == ipa_get_param_count (info))
1638 || cinfo->param_adjustments->m_always_copy_start < 0);
1640 pre_modified = true;
1641 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1643 if (dump_file && (dump_flags & TDF_DETAILS)
1644 && !node->alias && !node->thunk)
1646 bool first = true;
1647 for (int j = 0; j < ipa_get_param_count (info); j++)
1649 if (j < (int) surviving_params.length ()
1650 && surviving_params[j])
1651 continue;
1652 if (first)
1654 fprintf (dump_file,
1655 " The following parameters are dead on arrival:");
1656 first = false;
1658 fprintf (dump_file, " %u", j);
1660 if (!first)
1661 fprintf (dump_file, "\n");
1665 for (i = 0; i < ipa_get_param_count (info); i++)
1667 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1668 tree type = ipa_get_type (info, i);
1669 if (disable
1670 || !ipa_get_type (info, i)
1671 || (pre_modified && (surviving_params.length () <= (unsigned) i
1672 || !surviving_params[i])))
1674 plats->itself.set_to_bottom ();
1675 plats->ctxlat.set_to_bottom ();
1676 set_agg_lats_to_bottom (plats);
1677 plats->bits_lattice.set_to_bottom ();
1678 plats->m_value_range.init (type);
1679 plats->m_value_range.set_to_bottom ();
1681 else
1683 plats->m_value_range.init (type);
1684 if (variable)
1685 set_all_contains_variable (plats);
1689 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1690 if (ie->indirect_info->polymorphic
1691 && ie->indirect_info->param_index >= 0)
1693 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1694 ipa_get_parm_lattices (info,
1695 ie->indirect_info->param_index)->virt_call = 1;
1699 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1700 PARAM_TYPE. */
1702 static bool
1703 ipacp_value_safe_for_type (tree param_type, tree value)
1705 tree val_type = TREE_TYPE (value);
1706 if (param_type == val_type
1707 || useless_type_conversion_p (param_type, val_type)
1708 || fold_convertible_p (param_type, value))
1709 return true;
1710 else
1711 return false;
1714 /* Return the result of a (possibly arithmetic) operation on the constant
1715 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1716 the type of the parameter to which the result is passed. Return
1717 NULL_TREE if that cannot be determined or be considered an
1718 interprocedural invariant. */
1720 static tree
1721 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1722 tree res_type)
1724 tree res;
1726 if (opcode == NOP_EXPR)
1727 return input;
1728 if (!is_gimple_ip_invariant (input))
1729 return NULL_TREE;
1731 if (opcode == ASSERT_EXPR)
1733 if (values_equal_for_ipcp_p (input, operand))
1734 return input;
1735 else
1736 return NULL_TREE;
1739 if (!res_type)
1741 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1742 res_type = boolean_type_node;
1743 else if (expr_type_first_operand_type_p (opcode))
1744 res_type = TREE_TYPE (input);
1745 else
1746 return NULL_TREE;
1749 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1750 res = fold_unary (opcode, res_type, input);
1751 else
1752 res = fold_binary (opcode, res_type, input, operand);
1754 if (res && !is_gimple_ip_invariant (res))
1755 return NULL_TREE;
1757 return res;
1760 /* Return the result of a (possibly arithmetic) pass through jump function
1761 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1762 to which the result is passed. Return NULL_TREE if that cannot be
1763 determined or be considered an interprocedural invariant. */
1765 static tree
1766 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1767 tree res_type)
1769 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1770 input,
1771 ipa_get_jf_pass_through_operand (jfunc),
1772 res_type);
1775 /* Return the result of an ancestor jump function JFUNC on the constant value
1776 INPUT. Return NULL_TREE if that cannot be determined. */
1778 static tree
1779 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1781 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1782 if (TREE_CODE (input) == ADDR_EXPR)
1784 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1785 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1786 if (known_eq (off, 0))
1787 return input;
1788 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1789 return build1 (ADDR_EXPR, TREE_TYPE (input),
1790 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1791 build_int_cst (ptr_type_node, byte_offset)));
1793 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1794 && zerop (input))
1795 return input;
1796 else
1797 return NULL_TREE;
1800 /* Determine whether JFUNC evaluates to a single known constant value and if
1801 so, return it. Otherwise return NULL. INFO describes the caller node or
1802 the one it is inlined to, so that pass-through jump functions can be
1803 evaluated. PARM_TYPE is the type of the parameter to which the result is
1804 passed. */
1806 tree
1807 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1808 tree parm_type)
1810 if (jfunc->type == IPA_JF_CONST)
1811 return ipa_get_jf_constant (jfunc);
1812 else if (jfunc->type == IPA_JF_PASS_THROUGH
1813 || jfunc->type == IPA_JF_ANCESTOR)
1815 tree input;
1816 int idx;
1818 if (jfunc->type == IPA_JF_PASS_THROUGH)
1819 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1820 else
1821 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1823 if (info->ipcp_orig_node)
1824 input = info->known_csts[idx];
1825 else
1827 ipcp_lattice<tree> *lat;
1829 if (!info->lattices
1830 || idx >= ipa_get_param_count (info))
1831 return NULL_TREE;
1832 lat = ipa_get_scalar_lat (info, idx);
1833 if (!lat->is_single_const ())
1834 return NULL_TREE;
1835 input = lat->values->value;
1838 if (!input)
1839 return NULL_TREE;
1841 if (jfunc->type == IPA_JF_PASS_THROUGH)
1842 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1843 else
1844 return ipa_get_jf_ancestor_result (jfunc, input);
1846 else
1847 return NULL_TREE;
1850 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1851 that INFO describes the caller node or the one it is inlined to, CS is the
1852 call graph edge corresponding to JFUNC and CSIDX index of the described
1853 parameter. */
1855 ipa_polymorphic_call_context
1856 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1857 ipa_jump_func *jfunc)
1859 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1860 ipa_polymorphic_call_context ctx;
1861 ipa_polymorphic_call_context *edge_ctx
1862 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1864 if (edge_ctx && !edge_ctx->useless_p ())
1865 ctx = *edge_ctx;
1867 if (jfunc->type == IPA_JF_PASS_THROUGH
1868 || jfunc->type == IPA_JF_ANCESTOR)
1870 ipa_polymorphic_call_context srcctx;
1871 int srcidx;
1872 bool type_preserved = true;
1873 if (jfunc->type == IPA_JF_PASS_THROUGH)
1875 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1876 return ctx;
1877 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1878 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1880 else
1882 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1883 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1885 if (info->ipcp_orig_node)
1887 if (info->known_contexts.exists ())
1888 srcctx = info->known_contexts[srcidx];
1890 else
1892 if (!info->lattices
1893 || srcidx >= ipa_get_param_count (info))
1894 return ctx;
1895 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1896 lat = ipa_get_poly_ctx_lat (info, srcidx);
1897 if (!lat->is_single_const ())
1898 return ctx;
1899 srcctx = lat->values->value;
1901 if (srcctx.useless_p ())
1902 return ctx;
1903 if (jfunc->type == IPA_JF_ANCESTOR)
1904 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1905 if (!type_preserved)
1906 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1907 srcctx.combine_with (ctx);
1908 return srcctx;
1911 return ctx;
1914 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1915 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1916 the result is a range that is not VARYING nor UNDEFINED. */
1918 static bool
1919 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1920 const vrange &src_vr,
1921 enum tree_code operation,
1922 tree dst_type, tree src_type)
1924 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1925 return false;
1927 range_op_handler handler (operation);
1928 if (!handler)
1929 return false;
1931 Value_Range varying (dst_type);
1932 varying.set_varying (dst_type);
1934 return (handler.fold_range (dst_vr, dst_type, src_vr, varying)
1935 && !dst_vr.varying_p ()
1936 && !dst_vr.undefined_p ());
1939 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1940 first be extracted onto a vrange. */
1942 static bool
1943 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1944 const ipa_vr &src_vr,
1945 enum tree_code operation,
1946 tree dst_type, tree src_type)
1948 Value_Range tmp;
1949 src_vr.get_vrange (tmp);
1950 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1951 dst_type, src_type);
1954 /* Determine range of JFUNC given that INFO describes the caller node or
1955 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1956 and PARM_TYPE of the parameter. */
1958 void
1959 ipa_value_range_from_jfunc (vrange &vr,
1960 ipa_node_params *info, cgraph_edge *cs,
1961 ipa_jump_func *jfunc, tree parm_type)
1963 vr.set_undefined ();
1965 if (jfunc->m_vr)
1966 ipa_vr_operation_and_type_effects (vr,
1967 *jfunc->m_vr,
1968 NOP_EXPR, parm_type,
1969 jfunc->m_vr->type ());
1970 if (vr.singleton_p ())
1971 return;
1972 if (jfunc->type == IPA_JF_PASS_THROUGH)
1974 int idx;
1975 ipcp_transformation *sum
1976 = ipcp_get_transformation_summary (cs->caller->inlined_to
1977 ? cs->caller->inlined_to
1978 : cs->caller);
1979 if (!sum || !sum->m_vr)
1980 return;
1982 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1984 if (!(*sum->m_vr)[idx].known_p ())
1985 return;
1986 tree vr_type = ipa_get_type (info, idx);
1987 Value_Range srcvr;
1988 (*sum->m_vr)[idx].get_vrange (srcvr);
1990 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1992 if (TREE_CODE_CLASS (operation) == tcc_unary)
1994 Value_Range res (vr_type);
1996 if (ipa_vr_operation_and_type_effects (res,
1997 srcvr,
1998 operation, parm_type,
1999 vr_type))
2000 vr.intersect (res);
2002 else
2004 Value_Range op_res (vr_type);
2005 Value_Range res (vr_type);
2006 tree op = ipa_get_jf_pass_through_operand (jfunc);
2007 Value_Range op_vr (vr_type);
2008 range_op_handler handler (operation);
2010 ipa_range_set_and_normalize (op_vr, op);
2012 if (!handler
2013 || !op_res.supports_type_p (vr_type)
2014 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
2015 op_res.set_varying (vr_type);
2017 if (ipa_vr_operation_and_type_effects (res,
2018 op_res,
2019 NOP_EXPR, parm_type,
2020 vr_type))
2021 vr.intersect (res);
2026 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2027 single known constant value and if so, return it. Otherwise return NULL.
2028 NODE and INFO describes the caller node or the one it is inlined to, and
2029 its related info. */
2031 tree
2032 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
2033 const ipa_agg_jf_item *item)
2035 tree value = NULL_TREE;
2036 int src_idx;
2038 if (item->offset < 0
2039 || item->jftype == IPA_JF_UNKNOWN
2040 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
2041 return NULL_TREE;
2043 if (item->jftype == IPA_JF_CONST)
2044 return item->value.constant;
2046 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2047 || item->jftype == IPA_JF_LOAD_AGG);
2049 src_idx = item->value.pass_through.formal_id;
2051 if (info->ipcp_orig_node)
2053 if (item->jftype == IPA_JF_PASS_THROUGH)
2054 value = info->known_csts[src_idx];
2055 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2057 ipa_argagg_value_list avl (ts);
2058 value = avl.get_value (src_idx,
2059 item->value.load_agg.offset / BITS_PER_UNIT,
2060 item->value.load_agg.by_ref);
2063 else if (info->lattices)
2065 class ipcp_param_lattices *src_plats
2066 = ipa_get_parm_lattices (info, src_idx);
2068 if (item->jftype == IPA_JF_PASS_THROUGH)
2070 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2072 if (!lat->is_single_const ())
2073 return NULL_TREE;
2075 value = lat->values->value;
2077 else if (src_plats->aggs
2078 && !src_plats->aggs_bottom
2079 && !src_plats->aggs_contain_variable
2080 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2082 struct ipcp_agg_lattice *aglat;
2084 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2086 if (aglat->offset > item->value.load_agg.offset)
2087 break;
2089 if (aglat->offset == item->value.load_agg.offset)
2091 if (aglat->is_single_const ())
2092 value = aglat->values->value;
2093 break;
2099 if (!value)
2100 return NULL_TREE;
2102 if (item->jftype == IPA_JF_LOAD_AGG)
2104 tree load_type = item->value.load_agg.type;
2105 tree value_type = TREE_TYPE (value);
2107 /* Ensure value type is compatible with load type. */
2108 if (!useless_type_conversion_p (load_type, value_type))
2109 return NULL_TREE;
2112 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2113 value,
2114 item->value.pass_through.operand,
2115 item->type);
2118 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2119 caller is inlined to) NODE which described by INFO and push the results to
2120 RES as describing values passed in parameter DST_INDEX. */
2122 void
2123 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2124 ipa_agg_jump_function *agg_jfunc,
2125 unsigned dst_index,
2126 vec<ipa_argagg_value> *res)
2128 unsigned prev_unit_offset = 0;
2129 bool first = true;
2131 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2133 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2134 if (!value)
2135 continue;
2137 ipa_argagg_value iav;
2138 iav.value = value;
2139 iav.unit_offset = item.offset / BITS_PER_UNIT;
2140 iav.index = dst_index;
2141 iav.by_ref = agg_jfunc->by_ref;
2143 gcc_assert (first
2144 || iav.unit_offset > prev_unit_offset);
2145 prev_unit_offset = iav.unit_offset;
2146 first = false;
2148 res->safe_push (iav);
2152 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2153 bottom, not containing a variable component and without any known value at
2154 the same time. */
2156 DEBUG_FUNCTION void
2157 ipcp_verify_propagated_values (void)
2159 struct cgraph_node *node;
2161 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2163 ipa_node_params *info = ipa_node_params_sum->get (node);
2164 if (!opt_for_fn (node->decl, flag_ipa_cp)
2165 || !opt_for_fn (node->decl, optimize))
2166 continue;
2167 int i, count = ipa_get_param_count (info);
2169 for (i = 0; i < count; i++)
2171 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2173 if (!lat->bottom
2174 && !lat->contains_variable
2175 && lat->values_count == 0)
2177 if (dump_file)
2179 symtab->dump (dump_file);
2180 fprintf (dump_file, "\nIPA lattices after constant "
2181 "propagation, before gcc_unreachable:\n");
2182 print_all_lattices (dump_file, true, false);
2185 gcc_unreachable ();
2191 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2193 static bool
2194 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2195 ipa_polymorphic_call_context y)
2197 return x.equal_to (y);
2201 /* Add a new value source to the value represented by THIS, marking that a
2202 value comes from edge CS and (if the underlying jump function is a
2203 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2204 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2205 scalar value of the parameter itself or the offset within an aggregate. */
2207 template <typename valtype>
2208 void
2209 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2210 int src_idx, HOST_WIDE_INT offset)
2212 ipcp_value_source<valtype> *src;
2214 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2215 src->offset = offset;
2216 src->cs = cs;
2217 src->val = src_val;
2218 src->index = src_idx;
2220 src->next = sources;
2221 sources = src;
2224 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2225 SOURCE and clear all other fields. */
2227 static ipcp_value<tree> *
2228 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2230 ipcp_value<tree> *val;
2232 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2233 val->value = cst;
2234 val->self_recursion_generated_level = same_lat_gen_level;
2235 return val;
2238 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2239 value to SOURCE and clear all other fields. */
2241 static ipcp_value<ipa_polymorphic_call_context> *
2242 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2243 unsigned same_lat_gen_level)
2245 ipcp_value<ipa_polymorphic_call_context> *val;
2247 val = new (ipcp_poly_ctx_values_pool.allocate ())
2248 ipcp_value<ipa_polymorphic_call_context>();
2249 val->value = ctx;
2250 val->self_recursion_generated_level = same_lat_gen_level;
2251 return val;
2254 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2255 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2256 meaning. OFFSET -1 means the source is scalar and not a part of an
2257 aggregate. If non-NULL, VAL_P records address of existing or newly added
2258 ipcp_value.
2260 If the value is generated for a self-recursive call as a result of an
2261 arithmetic pass-through jump-function acting on a value in the same lattice,
2262 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2263 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2265 template <typename valtype>
2266 bool
2267 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2268 ipcp_value<valtype> *src_val,
2269 int src_idx, HOST_WIDE_INT offset,
2270 ipcp_value<valtype> **val_p,
2271 unsigned same_lat_gen_level)
2273 ipcp_value<valtype> *val, *last_val = NULL;
2275 if (val_p)
2276 *val_p = NULL;
2278 if (bottom)
2279 return false;
2281 for (val = values; val; last_val = val, val = val->next)
2282 if (values_equal_for_ipcp_p (val->value, newval))
2284 if (val_p)
2285 *val_p = val;
2287 if (val->self_recursion_generated_level < same_lat_gen_level)
2288 val->self_recursion_generated_level = same_lat_gen_level;
2290 if (ipa_edge_within_scc (cs))
2292 ipcp_value_source<valtype> *s;
2293 for (s = val->sources; s; s = s->next)
2294 if (s->cs == cs && s->val == src_val)
2295 break;
2296 if (s)
2297 return false;
2300 val->add_source (cs, src_val, src_idx, offset);
2301 return false;
2304 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2305 param_ipa_cp_value_list_size))
2307 /* We can only free sources, not the values themselves, because sources
2308 of other values in this SCC might point to them. */
2309 for (val = values; val; val = val->next)
2311 while (val->sources)
2313 ipcp_value_source<valtype> *src = val->sources;
2314 val->sources = src->next;
2315 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2318 values = NULL;
2319 return set_to_bottom ();
2322 values_count++;
2323 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2324 val->add_source (cs, src_val, src_idx, offset);
2325 val->next = NULL;
2327 /* Add the new value to end of value list, which can reduce iterations
2328 of propagation stage for recursive function. */
2329 if (last_val)
2330 last_val->next = val;
2331 else
2332 values = val;
2334 if (val_p)
2335 *val_p = val;
2337 return true;
2340 /* A helper function that returns result of operation specified by OPCODE on
2341 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2342 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2343 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2344 the result. */
2346 static tree
2347 get_val_across_arith_op (enum tree_code opcode,
2348 tree opnd1_type,
2349 tree opnd2,
2350 ipcp_value<tree> *src_val,
2351 tree res_type)
2353 tree opnd1 = src_val->value;
2355 /* Skip source values that is incompatible with specified type. */
2356 if (opnd1_type
2357 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2358 return NULL_TREE;
2360 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2363 /* Propagate values through an arithmetic transformation described by a jump
2364 function associated with edge CS, taking values from SRC_LAT and putting
2365 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2366 OPND2 is a constant value if transformation is a binary operation.
2367 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2368 a part of the aggregate. SRC_IDX is the index of the source parameter.
2369 RES_TYPE is the value type of result being propagated into. Return true if
2370 DEST_LAT changed. */
2372 static bool
2373 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2374 enum tree_code opcode,
2375 tree opnd1_type,
2376 tree opnd2,
2377 ipcp_lattice<tree> *src_lat,
2378 ipcp_lattice<tree> *dest_lat,
2379 HOST_WIDE_INT src_offset,
2380 int src_idx,
2381 tree res_type)
2383 ipcp_value<tree> *src_val;
2384 bool ret = false;
2386 /* Due to circular dependencies, propagating within an SCC through arithmetic
2387 transformation would create infinite number of values. But for
2388 self-feeding recursive function, we could allow propagation in a limited
2389 count, and this can enable a simple kind of recursive function versioning.
2390 For other scenario, we would just make lattices bottom. */
2391 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2393 int i;
2395 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2396 param_ipa_cp_max_recursive_depth);
2397 if (src_lat != dest_lat || max_recursive_depth < 1)
2398 return dest_lat->set_contains_variable ();
2400 /* No benefit if recursive execution is in low probability. */
2401 if (cs->sreal_frequency () * 100
2402 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2403 param_ipa_cp_min_recursive_probability))
2404 return dest_lat->set_contains_variable ();
2406 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2408 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2410 /* Now we do not use self-recursively generated value as propagation
2411 source, this is absolutely conservative, but could avoid explosion
2412 of lattice's value space, especially when one recursive function
2413 calls another recursive. */
2414 if (src_val->self_recursion_generated_p ())
2416 ipcp_value_source<tree> *s;
2418 /* If the lattice has already been propagated for the call site,
2419 no need to do that again. */
2420 for (s = src_val->sources; s; s = s->next)
2421 if (s->cs == cs)
2422 return dest_lat->set_contains_variable ();
2424 else
2425 val_seeds.safe_push (src_val);
2428 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2430 /* Recursively generate lattice values with a limited count. */
2431 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2433 for (int j = 1; j < max_recursive_depth; j++)
2435 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2436 src_val, res_type);
2437 if (!cstval
2438 || !ipacp_value_safe_for_type (res_type, cstval))
2439 break;
2441 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2442 src_offset, &src_val, j);
2443 gcc_checking_assert (src_val);
2446 ret |= dest_lat->set_contains_variable ();
2448 else
2449 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2451 /* Now we do not use self-recursively generated value as propagation
2452 source, otherwise it is easy to make value space of normal lattice
2453 overflow. */
2454 if (src_val->self_recursion_generated_p ())
2456 ret |= dest_lat->set_contains_variable ();
2457 continue;
2460 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2461 src_val, res_type);
2462 if (cstval
2463 && ipacp_value_safe_for_type (res_type, cstval))
2464 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2465 src_offset);
2466 else
2467 ret |= dest_lat->set_contains_variable ();
2470 return ret;
2473 /* Propagate values through a pass-through jump function JFUNC associated with
2474 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2475 is the index of the source parameter. PARM_TYPE is the type of the
2476 parameter to which the result is passed. */
2478 static bool
2479 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2480 ipcp_lattice<tree> *src_lat,
2481 ipcp_lattice<tree> *dest_lat, int src_idx,
2482 tree parm_type)
2484 return propagate_vals_across_arith_jfunc (cs,
2485 ipa_get_jf_pass_through_operation (jfunc),
2486 NULL_TREE,
2487 ipa_get_jf_pass_through_operand (jfunc),
2488 src_lat, dest_lat, -1, src_idx, parm_type);
2491 /* Propagate values through an ancestor jump function JFUNC associated with
2492 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2493 is the index of the source parameter. */
2495 static bool
2496 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2497 struct ipa_jump_func *jfunc,
2498 ipcp_lattice<tree> *src_lat,
2499 ipcp_lattice<tree> *dest_lat, int src_idx,
2500 tree param_type)
2502 ipcp_value<tree> *src_val;
2503 bool ret = false;
2505 if (ipa_edge_within_scc (cs))
2506 return dest_lat->set_contains_variable ();
2508 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2510 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2512 if (t && ipacp_value_safe_for_type (param_type, t))
2513 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2514 else
2515 ret |= dest_lat->set_contains_variable ();
2518 return ret;
2521 /* Propagate scalar values across jump function JFUNC that is associated with
2522 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2523 parameter to which the result is passed. */
2525 static bool
2526 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2527 struct ipa_jump_func *jfunc,
2528 ipcp_lattice<tree> *dest_lat,
2529 tree param_type)
2531 if (dest_lat->bottom)
2532 return false;
2534 if (jfunc->type == IPA_JF_CONST)
2536 tree val = ipa_get_jf_constant (jfunc);
2537 if (ipacp_value_safe_for_type (param_type, val))
2538 return dest_lat->add_value (val, cs, NULL, 0);
2539 else
2540 return dest_lat->set_contains_variable ();
2542 else if (jfunc->type == IPA_JF_PASS_THROUGH
2543 || jfunc->type == IPA_JF_ANCESTOR)
2545 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2546 ipcp_lattice<tree> *src_lat;
2547 int src_idx;
2548 bool ret;
2550 if (jfunc->type == IPA_JF_PASS_THROUGH)
2551 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2552 else
2553 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2555 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2556 if (src_lat->bottom)
2557 return dest_lat->set_contains_variable ();
2559 /* If we would need to clone the caller and cannot, do not propagate. */
2560 if (!ipcp_versionable_function_p (cs->caller)
2561 && (src_lat->contains_variable
2562 || (src_lat->values_count > 1)))
2563 return dest_lat->set_contains_variable ();
2565 if (jfunc->type == IPA_JF_PASS_THROUGH)
2566 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2567 dest_lat, src_idx,
2568 param_type);
2569 else
2570 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2571 src_idx, param_type);
2573 if (src_lat->contains_variable)
2574 ret |= dest_lat->set_contains_variable ();
2576 return ret;
2579 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2580 use it for indirect inlining), we should propagate them too. */
2581 return dest_lat->set_contains_variable ();
2584 /* Propagate scalar values across jump function JFUNC that is associated with
2585 edge CS and describes argument IDX and put the values into DEST_LAT. */
2587 static bool
2588 propagate_context_across_jump_function (cgraph_edge *cs,
2589 ipa_jump_func *jfunc, int idx,
2590 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2592 if (dest_lat->bottom)
2593 return false;
2594 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2595 bool ret = false;
2596 bool added_sth = false;
2597 bool type_preserved = true;
2599 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2600 = ipa_get_ith_polymorhic_call_context (args, idx);
2602 if (edge_ctx_ptr)
2603 edge_ctx = *edge_ctx_ptr;
2605 if (jfunc->type == IPA_JF_PASS_THROUGH
2606 || jfunc->type == IPA_JF_ANCESTOR)
2608 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2609 int src_idx;
2610 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2612 /* TODO: Once we figure out how to propagate speculations, it will
2613 probably be a good idea to switch to speculation if type_preserved is
2614 not set instead of punting. */
2615 if (jfunc->type == IPA_JF_PASS_THROUGH)
2617 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2618 goto prop_fail;
2619 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2620 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2622 else
2624 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2625 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2628 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2629 /* If we would need to clone the caller and cannot, do not propagate. */
2630 if (!ipcp_versionable_function_p (cs->caller)
2631 && (src_lat->contains_variable
2632 || (src_lat->values_count > 1)))
2633 goto prop_fail;
2635 ipcp_value<ipa_polymorphic_call_context> *src_val;
2636 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2638 ipa_polymorphic_call_context cur = src_val->value;
2640 if (!type_preserved)
2641 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2642 if (jfunc->type == IPA_JF_ANCESTOR)
2643 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2644 /* TODO: In cases we know how the context is going to be used,
2645 we can improve the result by passing proper OTR_TYPE. */
2646 cur.combine_with (edge_ctx);
2647 if (!cur.useless_p ())
2649 if (src_lat->contains_variable
2650 && !edge_ctx.equal_to (cur))
2651 ret |= dest_lat->set_contains_variable ();
2652 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2653 added_sth = true;
2658 prop_fail:
2659 if (!added_sth)
2661 if (!edge_ctx.useless_p ())
2662 ret |= dest_lat->add_value (edge_ctx, cs);
2663 else
2664 ret |= dest_lat->set_contains_variable ();
2667 return ret;
2670 /* Propagate bits across jfunc that is associated with
2671 edge cs and update dest_lattice accordingly. */
2673 bool
2674 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2675 ipa_jump_func *jfunc,
2676 ipcp_bits_lattice *dest_lattice)
2678 if (dest_lattice->bottom_p ())
2679 return false;
2681 enum availability availability;
2682 cgraph_node *callee = cs->callee->function_symbol (&availability);
2683 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2684 tree parm_type = ipa_get_type (callee_info, idx);
2686 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2687 transform for these cases. Similarly, we can have bad type mismatches
2688 with LTO, avoid doing anything with those too. */
2689 if (!parm_type
2690 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2692 if (dump_file && (dump_flags & TDF_DETAILS))
2693 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2694 "param %i of %s is NULL or unsuitable for bits propagation\n",
2695 idx, cs->callee->dump_name ());
2697 return dest_lattice->set_to_bottom ();
2700 unsigned precision = TYPE_PRECISION (parm_type);
2701 signop sgn = TYPE_SIGN (parm_type);
2703 if (jfunc->type == IPA_JF_PASS_THROUGH
2704 || jfunc->type == IPA_JF_ANCESTOR)
2706 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2707 tree operand = NULL_TREE;
2708 enum tree_code code;
2709 unsigned src_idx;
2710 bool keep_null = false;
2712 if (jfunc->type == IPA_JF_PASS_THROUGH)
2714 code = ipa_get_jf_pass_through_operation (jfunc);
2715 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2716 if (code != NOP_EXPR)
2717 operand = ipa_get_jf_pass_through_operand (jfunc);
2719 else
2721 code = POINTER_PLUS_EXPR;
2722 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2723 unsigned HOST_WIDE_INT offset
2724 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2725 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2726 operand = build_int_cstu (size_type_node, offset);
2729 class ipcp_param_lattices *src_lats
2730 = ipa_get_parm_lattices (caller_info, src_idx);
2732 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2733 for eg consider:
2734 int f(int x)
2736 g (x & 0xff);
2738 Assume lattice for x is bottom, however we can still propagate
2739 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2740 and we store it in jump function during analysis stage. */
2742 if (!src_lats->bits_lattice.bottom_p ())
2744 bool drop_all_ones
2745 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2747 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2748 sgn, code, operand, drop_all_ones);
2752 if (jfunc->bits)
2753 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2754 precision);
2755 else
2756 return dest_lattice->set_to_bottom ();
2759 /* Propagate value range across jump function JFUNC that is associated with
2760 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2761 accordingly. */
2763 static bool
2764 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2765 class ipcp_param_lattices *dest_plats,
2766 tree param_type)
2768 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2770 if (dest_lat->bottom_p ())
2771 return false;
2773 if (!param_type
2774 || (!INTEGRAL_TYPE_P (param_type)
2775 && !POINTER_TYPE_P (param_type)))
2776 return dest_lat->set_to_bottom ();
2778 if (jfunc->type == IPA_JF_PASS_THROUGH)
2780 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2781 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2782 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2783 class ipcp_param_lattices *src_lats
2784 = ipa_get_parm_lattices (caller_info, src_idx);
2785 tree operand_type = ipa_get_type (caller_info, src_idx);
2787 if (src_lats->m_value_range.bottom_p ())
2788 return dest_lat->set_to_bottom ();
2790 Value_Range vr (operand_type);
2791 if (TREE_CODE_CLASS (operation) == tcc_unary)
2792 ipa_vr_operation_and_type_effects (vr,
2793 src_lats->m_value_range.m_vr,
2794 operation, param_type,
2795 operand_type);
2796 /* A crude way to prevent unbounded number of value range updates
2797 in SCC components. We should allow limited number of updates within
2798 SCC, too. */
2799 else if (!ipa_edge_within_scc (cs))
2801 tree op = ipa_get_jf_pass_through_operand (jfunc);
2802 Value_Range op_vr (TREE_TYPE (op));
2803 Value_Range op_res (operand_type);
2804 range_op_handler handler (operation);
2806 ipa_range_set_and_normalize (op_vr, op);
2808 if (!handler
2809 || !op_res.supports_type_p (operand_type)
2810 || !handler.fold_range (op_res, operand_type,
2811 src_lats->m_value_range.m_vr, op_vr))
2812 op_res.set_varying (operand_type);
2814 ipa_vr_operation_and_type_effects (vr,
2815 op_res,
2816 NOP_EXPR, param_type,
2817 operand_type);
2819 if (!vr.undefined_p () && !vr.varying_p ())
2821 if (jfunc->m_vr)
2823 Value_Range jvr (param_type);
2824 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2825 NOP_EXPR,
2826 param_type,
2827 jfunc->m_vr->type ()))
2828 vr.intersect (jvr);
2830 return dest_lat->meet_with (vr);
2833 else if (jfunc->type == IPA_JF_CONST)
2835 tree val = ipa_get_jf_constant (jfunc);
2836 if (TREE_CODE (val) == INTEGER_CST)
2838 val = fold_convert (param_type, val);
2839 if (TREE_OVERFLOW_P (val))
2840 val = drop_tree_overflow (val);
2842 Value_Range tmpvr (val, val);
2843 return dest_lat->meet_with (tmpvr);
2847 Value_Range vr (param_type);
2848 if (jfunc->m_vr
2849 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2850 param_type,
2851 jfunc->m_vr->type ()))
2852 return dest_lat->meet_with (vr);
2853 else
2854 return dest_lat->set_to_bottom ();
2857 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2858 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2859 other cases, return false). If there are no aggregate items, set
2860 aggs_by_ref to NEW_AGGS_BY_REF. */
2862 static bool
2863 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2864 bool new_aggs_by_ref)
2866 if (dest_plats->aggs)
2868 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2870 set_agg_lats_to_bottom (dest_plats);
2871 return true;
2874 else
2875 dest_plats->aggs_by_ref = new_aggs_by_ref;
2876 return false;
2879 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2880 already existing lattice for the given OFFSET and SIZE, marking all skipped
2881 lattices as containing variable and checking for overlaps. If there is no
2882 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2883 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2884 unless there are too many already. If there are two many, return false. If
2885 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2886 skipped lattices were newly marked as containing variable, set *CHANGE to
2887 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2889 static bool
2890 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2891 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2892 struct ipcp_agg_lattice ***aglat,
2893 bool pre_existing, bool *change, int max_agg_items)
2895 gcc_checking_assert (offset >= 0);
2897 while (**aglat && (**aglat)->offset < offset)
2899 if ((**aglat)->offset + (**aglat)->size > offset)
2901 set_agg_lats_to_bottom (dest_plats);
2902 return false;
2904 *change |= (**aglat)->set_contains_variable ();
2905 *aglat = &(**aglat)->next;
2908 if (**aglat && (**aglat)->offset == offset)
2910 if ((**aglat)->size != val_size)
2912 set_agg_lats_to_bottom (dest_plats);
2913 return false;
2915 gcc_assert (!(**aglat)->next
2916 || (**aglat)->next->offset >= offset + val_size);
2917 return true;
2919 else
2921 struct ipcp_agg_lattice *new_al;
2923 if (**aglat && (**aglat)->offset < offset + val_size)
2925 set_agg_lats_to_bottom (dest_plats);
2926 return false;
2928 if (dest_plats->aggs_count == max_agg_items)
2929 return false;
2930 dest_plats->aggs_count++;
2931 new_al = ipcp_agg_lattice_pool.allocate ();
2932 memset (new_al, 0, sizeof (*new_al));
2934 new_al->offset = offset;
2935 new_al->size = val_size;
2936 new_al->contains_variable = pre_existing;
2938 new_al->next = **aglat;
2939 **aglat = new_al;
2940 return true;
2944 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2945 containing an unknown value. */
2947 static bool
2948 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2950 bool ret = false;
2951 while (aglat)
2953 ret |= aglat->set_contains_variable ();
2954 aglat = aglat->next;
2956 return ret;
2959 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2960 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2961 parameter used for lattice value sources. Return true if DEST_PLATS changed
2962 in any way. */
2964 static bool
2965 merge_aggregate_lattices (struct cgraph_edge *cs,
2966 class ipcp_param_lattices *dest_plats,
2967 class ipcp_param_lattices *src_plats,
2968 int src_idx, HOST_WIDE_INT offset_delta)
2970 bool pre_existing = dest_plats->aggs != NULL;
2971 struct ipcp_agg_lattice **dst_aglat;
2972 bool ret = false;
2974 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2975 return true;
2976 if (src_plats->aggs_bottom)
2977 return set_agg_lats_contain_variable (dest_plats);
2978 if (src_plats->aggs_contain_variable)
2979 ret |= set_agg_lats_contain_variable (dest_plats);
2980 dst_aglat = &dest_plats->aggs;
2982 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2983 param_ipa_max_agg_items);
2984 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2985 src_aglat;
2986 src_aglat = src_aglat->next)
2988 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2990 if (new_offset < 0)
2991 continue;
2992 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2993 &dst_aglat, pre_existing, &ret, max_agg_items))
2995 struct ipcp_agg_lattice *new_al = *dst_aglat;
2997 dst_aglat = &(*dst_aglat)->next;
2998 if (src_aglat->bottom)
3000 ret |= new_al->set_contains_variable ();
3001 continue;
3003 if (src_aglat->contains_variable)
3004 ret |= new_al->set_contains_variable ();
3005 for (ipcp_value<tree> *val = src_aglat->values;
3006 val;
3007 val = val->next)
3008 ret |= new_al->add_value (val->value, cs, val, src_idx,
3009 src_aglat->offset);
3011 else if (dest_plats->aggs_bottom)
3012 return true;
3014 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
3015 return ret;
3018 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3019 pass-through JFUNC and if so, whether it has conform and conforms to the
3020 rules about propagating values passed by reference. */
3022 static bool
3023 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
3024 struct ipa_jump_func *jfunc)
3026 return src_plats->aggs
3027 && (!src_plats->aggs_by_ref
3028 || ipa_get_jf_pass_through_agg_preserved (jfunc));
3031 /* Propagate values through ITEM, jump function for a part of an aggregate,
3032 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3033 associated with the jump function. Return true if AGLAT changed in any
3034 way. */
3036 static bool
3037 propagate_aggregate_lattice (struct cgraph_edge *cs,
3038 struct ipa_agg_jf_item *item,
3039 struct ipcp_agg_lattice *aglat)
3041 class ipa_node_params *caller_info;
3042 class ipcp_param_lattices *src_plats;
3043 struct ipcp_lattice<tree> *src_lat;
3044 HOST_WIDE_INT src_offset;
3045 int src_idx;
3046 tree load_type;
3047 bool ret;
3049 if (item->jftype == IPA_JF_CONST)
3051 tree value = item->value.constant;
3053 gcc_checking_assert (is_gimple_ip_invariant (value));
3054 return aglat->add_value (value, cs, NULL, 0);
3057 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
3058 || item->jftype == IPA_JF_LOAD_AGG);
3060 caller_info = ipa_node_params_sum->get (cs->caller);
3061 src_idx = item->value.pass_through.formal_id;
3062 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3064 if (item->jftype == IPA_JF_PASS_THROUGH)
3066 load_type = NULL_TREE;
3067 src_lat = &src_plats->itself;
3068 src_offset = -1;
3070 else
3072 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3073 struct ipcp_agg_lattice *src_aglat;
3075 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3076 if (src_aglat->offset >= load_offset)
3077 break;
3079 load_type = item->value.load_agg.type;
3080 if (!src_aglat
3081 || src_aglat->offset > load_offset
3082 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3083 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3084 return aglat->set_contains_variable ();
3086 src_lat = src_aglat;
3087 src_offset = load_offset;
3090 if (src_lat->bottom
3091 || (!ipcp_versionable_function_p (cs->caller)
3092 && !src_lat->is_single_const ()))
3093 return aglat->set_contains_variable ();
3095 ret = propagate_vals_across_arith_jfunc (cs,
3096 item->value.pass_through.operation,
3097 load_type,
3098 item->value.pass_through.operand,
3099 src_lat, aglat,
3100 src_offset,
3101 src_idx,
3102 item->type);
3104 if (src_lat->contains_variable)
3105 ret |= aglat->set_contains_variable ();
3107 return ret;
3110 /* Propagate scalar values across jump function JFUNC that is associated with
3111 edge CS and put the values into DEST_LAT. */
3113 static bool
3114 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3115 struct ipa_jump_func *jfunc,
3116 class ipcp_param_lattices *dest_plats)
3118 bool ret = false;
3120 if (dest_plats->aggs_bottom)
3121 return false;
3123 if (jfunc->type == IPA_JF_PASS_THROUGH
3124 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3126 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3127 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3128 class ipcp_param_lattices *src_plats;
3130 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3131 if (agg_pass_through_permissible_p (src_plats, jfunc))
3133 /* Currently we do not produce clobber aggregate jump
3134 functions, replace with merging when we do. */
3135 gcc_assert (!jfunc->agg.items);
3136 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3137 src_idx, 0);
3138 return ret;
3141 else if (jfunc->type == IPA_JF_ANCESTOR
3142 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3144 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3145 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3146 class ipcp_param_lattices *src_plats;
3148 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3149 if (src_plats->aggs && src_plats->aggs_by_ref)
3151 /* Currently we do not produce clobber aggregate jump
3152 functions, replace with merging when we do. */
3153 gcc_assert (!jfunc->agg.items);
3154 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3155 ipa_get_jf_ancestor_offset (jfunc));
3157 else if (!src_plats->aggs_by_ref)
3158 ret |= set_agg_lats_to_bottom (dest_plats);
3159 else
3160 ret |= set_agg_lats_contain_variable (dest_plats);
3161 return ret;
3164 if (jfunc->agg.items)
3166 bool pre_existing = dest_plats->aggs != NULL;
3167 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3168 struct ipa_agg_jf_item *item;
3169 int i;
3171 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3172 return true;
3174 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3175 param_ipa_max_agg_items);
3176 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3178 HOST_WIDE_INT val_size;
3180 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3181 continue;
3182 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3184 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3185 &aglat, pre_existing, &ret, max_agg_items))
3187 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3188 aglat = &(*aglat)->next;
3190 else if (dest_plats->aggs_bottom)
3191 return true;
3194 ret |= set_chain_of_aglats_contains_variable (*aglat);
3196 else
3197 ret |= set_agg_lats_contain_variable (dest_plats);
3199 return ret;
3202 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3203 non-thunk) destination, the call passes through a thunk. */
3205 static bool
3206 call_passes_through_thunk (cgraph_edge *cs)
3208 cgraph_node *alias_or_thunk = cs->callee;
3209 while (alias_or_thunk->alias)
3210 alias_or_thunk = alias_or_thunk->get_alias_target ();
3211 return alias_or_thunk->thunk;
3214 /* Propagate constants from the caller to the callee of CS. INFO describes the
3215 caller. */
3217 static bool
3218 propagate_constants_across_call (struct cgraph_edge *cs)
3220 class ipa_node_params *callee_info;
3221 enum availability availability;
3222 cgraph_node *callee;
3223 class ipa_edge_args *args;
3224 bool ret = false;
3225 int i, args_count, parms_count;
3227 callee = cs->callee->function_symbol (&availability);
3228 if (!callee->definition)
3229 return false;
3230 gcc_checking_assert (callee->has_gimple_body_p ());
3231 callee_info = ipa_node_params_sum->get (callee);
3232 if (!callee_info)
3233 return false;
3235 args = ipa_edge_args_sum->get (cs);
3236 parms_count = ipa_get_param_count (callee_info);
3237 if (parms_count == 0)
3238 return false;
3239 if (!args
3240 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3241 || !opt_for_fn (cs->caller->decl, optimize))
3243 for (i = 0; i < parms_count; i++)
3244 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3245 i));
3246 return ret;
3248 args_count = ipa_get_cs_argument_count (args);
3250 /* If this call goes through a thunk we must not propagate to the first (0th)
3251 parameter. However, we might need to uncover a thunk from below a series
3252 of aliases first. */
3253 if (call_passes_through_thunk (cs))
3255 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3256 0));
3257 i = 1;
3259 else
3260 i = 0;
3262 for (; (i < args_count) && (i < parms_count); i++)
3264 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3265 class ipcp_param_lattices *dest_plats;
3266 tree param_type = ipa_get_type (callee_info, i);
3268 dest_plats = ipa_get_parm_lattices (callee_info, i);
3269 if (availability == AVAIL_INTERPOSABLE)
3270 ret |= set_all_contains_variable (dest_plats);
3271 else
3273 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3274 &dest_plats->itself,
3275 param_type);
3276 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3277 &dest_plats->ctxlat);
3279 |= propagate_bits_across_jump_function (cs, i, jump_func,
3280 &dest_plats->bits_lattice);
3281 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3282 dest_plats);
3283 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3284 ret |= propagate_vr_across_jump_function (cs, jump_func,
3285 dest_plats, param_type);
3286 else
3287 ret |= dest_plats->m_value_range.set_to_bottom ();
3290 for (; i < parms_count; i++)
3291 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3293 return ret;
3296 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3297 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3298 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3299 KNOWN_AGGS is ignored. */
3301 static tree
3302 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3303 const vec<tree> &known_csts,
3304 const vec<ipa_polymorphic_call_context> &known_contexts,
3305 const ipa_argagg_value_list &avs,
3306 bool *speculative)
3308 int param_index = ie->indirect_info->param_index;
3309 HOST_WIDE_INT anc_offset;
3310 tree t = NULL;
3311 tree target = NULL;
3313 *speculative = false;
3315 if (param_index == -1)
3316 return NULL_TREE;
3318 if (!ie->indirect_info->polymorphic)
3320 tree t = NULL;
3322 if (ie->indirect_info->agg_contents)
3324 t = NULL;
3325 if ((unsigned) param_index < known_csts.length ()
3326 && known_csts[param_index])
3327 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3328 ie->indirect_info->offset,
3329 ie->indirect_info->by_ref);
3331 if (!t && ie->indirect_info->guaranteed_unmodified)
3332 t = avs.get_value (param_index,
3333 ie->indirect_info->offset / BITS_PER_UNIT,
3334 ie->indirect_info->by_ref);
3336 else if ((unsigned) param_index < known_csts.length ())
3337 t = known_csts[param_index];
3339 if (t
3340 && TREE_CODE (t) == ADDR_EXPR
3341 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3342 return TREE_OPERAND (t, 0);
3343 else
3344 return NULL_TREE;
3347 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3348 return NULL_TREE;
3350 gcc_assert (!ie->indirect_info->agg_contents);
3351 gcc_assert (!ie->indirect_info->by_ref);
3352 anc_offset = ie->indirect_info->offset;
3354 t = NULL;
3356 if ((unsigned) param_index < known_csts.length ()
3357 && known_csts[param_index])
3358 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3359 ie->indirect_info->offset, true);
3361 /* Try to work out value of virtual table pointer value in replacements. */
3362 /* or known aggregate values. */
3363 if (!t)
3364 t = avs.get_value (param_index,
3365 ie->indirect_info->offset / BITS_PER_UNIT,
3366 true);
3368 /* If we found the virtual table pointer, lookup the target. */
3369 if (t)
3371 tree vtable;
3372 unsigned HOST_WIDE_INT offset;
3373 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3375 bool can_refer;
3376 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3377 vtable, offset, &can_refer);
3378 if (can_refer)
3380 if (!target
3381 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3382 || !possible_polymorphic_call_target_p
3383 (ie, cgraph_node::get (target)))
3385 /* Do not speculate builtin_unreachable, it is stupid! */
3386 if (ie->indirect_info->vptr_changed)
3387 return NULL;
3388 target = ipa_impossible_devirt_target (ie, target);
3390 *speculative = ie->indirect_info->vptr_changed;
3391 if (!*speculative)
3392 return target;
3397 /* Do we know the constant value of pointer? */
3398 if (!t && (unsigned) param_index < known_csts.length ())
3399 t = known_csts[param_index];
3401 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3403 ipa_polymorphic_call_context context;
3404 if (known_contexts.length () > (unsigned int) param_index)
3406 context = known_contexts[param_index];
3407 context.offset_by (anc_offset);
3408 if (ie->indirect_info->vptr_changed)
3409 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3410 ie->indirect_info->otr_type);
3411 if (t)
3413 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3414 (t, ie->indirect_info->otr_type, anc_offset);
3415 if (!ctx2.useless_p ())
3416 context.combine_with (ctx2, ie->indirect_info->otr_type);
3419 else if (t)
3421 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3422 anc_offset);
3423 if (ie->indirect_info->vptr_changed)
3424 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3425 ie->indirect_info->otr_type);
3427 else
3428 return NULL_TREE;
3430 vec <cgraph_node *>targets;
3431 bool final;
3433 targets = possible_polymorphic_call_targets
3434 (ie->indirect_info->otr_type,
3435 ie->indirect_info->otr_token,
3436 context, &final);
3437 if (!final || targets.length () > 1)
3439 struct cgraph_node *node;
3440 if (*speculative)
3441 return target;
3442 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3443 || ie->speculative || !ie->maybe_hot_p ())
3444 return NULL;
3445 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3446 ie->indirect_info->otr_token,
3447 context);
3448 if (node)
3450 *speculative = true;
3451 target = node->decl;
3453 else
3454 return NULL;
3456 else
3458 *speculative = false;
3459 if (targets.length () == 1)
3460 target = targets[0]->decl;
3461 else
3462 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3465 if (target && !possible_polymorphic_call_target_p (ie,
3466 cgraph_node::get (target)))
3468 if (*speculative)
3469 return NULL;
3470 target = ipa_impossible_devirt_target (ie, target);
3473 return target;
3476 /* If an indirect edge IE can be turned into a direct one based on data in
3477 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3478 whether the discovered target is only speculative guess. */
3480 tree
3481 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3482 ipa_call_arg_values *avals,
3483 bool *speculative)
3485 ipa_argagg_value_list avl (avals);
3486 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3487 avals->m_known_contexts,
3488 avl, speculative);
3491 /* Calculate devirtualization time bonus for NODE, assuming we know information
3492 about arguments stored in AVALS. */
3494 static int
3495 devirtualization_time_bonus (struct cgraph_node *node,
3496 ipa_auto_call_arg_values *avals)
3498 struct cgraph_edge *ie;
3499 int res = 0;
3501 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3503 struct cgraph_node *callee;
3504 class ipa_fn_summary *isummary;
3505 enum availability avail;
3506 tree target;
3507 bool speculative;
3509 ipa_argagg_value_list avl (avals);
3510 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3511 avals->m_known_contexts,
3512 avl, &speculative);
3513 if (!target)
3514 continue;
3516 /* Only bare minimum benefit for clearly un-inlineable targets. */
3517 res += 1;
3518 callee = cgraph_node::get (target);
3519 if (!callee || !callee->definition)
3520 continue;
3521 callee = callee->function_symbol (&avail);
3522 if (avail < AVAIL_AVAILABLE)
3523 continue;
3524 isummary = ipa_fn_summaries->get (callee);
3525 if (!isummary || !isummary->inlinable)
3526 continue;
3528 int size = ipa_size_summaries->get (callee)->size;
3529 /* FIXME: The values below need re-considering and perhaps also
3530 integrating into the cost metrics, at lest in some very basic way. */
3531 int max_inline_insns_auto
3532 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3533 if (size <= max_inline_insns_auto / 4)
3534 res += 31 / ((int)speculative + 1);
3535 else if (size <= max_inline_insns_auto / 2)
3536 res += 15 / ((int)speculative + 1);
3537 else if (size <= max_inline_insns_auto
3538 || DECL_DECLARED_INLINE_P (callee->decl))
3539 res += 7 / ((int)speculative + 1);
3542 return res;
3545 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3547 static int
3548 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3550 int result = 0;
3551 ipa_hints hints = estimates.hints;
3552 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3553 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3555 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3557 if (hints & INLINE_HINT_loop_iterations)
3558 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3560 if (hints & INLINE_HINT_loop_stride)
3561 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3563 return result;
3566 /* If there is a reason to penalize the function described by INFO in the
3567 cloning goodness evaluation, do so. */
3569 static inline sreal
3570 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3571 sreal evaluation)
3573 if (info->node_within_scc && !info->node_is_self_scc)
3574 evaluation = (evaluation
3575 * (100 - opt_for_fn (node->decl,
3576 param_ipa_cp_recursion_penalty))) / 100;
3578 if (info->node_calling_single_call)
3579 evaluation = (evaluation
3580 * (100 - opt_for_fn (node->decl,
3581 param_ipa_cp_single_call_penalty)))
3582 / 100;
3584 return evaluation;
3587 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3588 and SIZE_COST and with the sum of frequencies of incoming edges to the
3589 potential new clone in FREQUENCIES. */
3591 static bool
3592 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3593 sreal freq_sum, profile_count count_sum,
3594 int size_cost)
3596 if (time_benefit == 0
3597 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3598 || node->optimize_for_size_p ())
3599 return false;
3601 gcc_assert (size_cost > 0);
3603 ipa_node_params *info = ipa_node_params_sum->get (node);
3604 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3605 if (count_sum.nonzero_p ())
3607 gcc_assert (base_count.nonzero_p ());
3608 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3609 sreal evaluation = (time_benefit * factor) / size_cost;
3610 evaluation = incorporate_penalties (node, info, evaluation);
3611 evaluation *= 1000;
3613 if (dump_file && (dump_flags & TDF_DETAILS))
3615 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3616 "size: %i, count_sum: ", time_benefit.to_double (),
3617 size_cost);
3618 count_sum.dump (dump_file);
3619 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3620 info->node_within_scc
3621 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3622 info->node_calling_single_call ? ", single_call" : "",
3623 evaluation.to_double (), eval_threshold);
3626 return evaluation.to_int () >= eval_threshold;
3628 else
3630 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3631 evaluation = incorporate_penalties (node, info, evaluation);
3632 evaluation *= 1000;
3634 if (dump_file && (dump_flags & TDF_DETAILS))
3635 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3636 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3637 "threshold: %i\n",
3638 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3639 info->node_within_scc
3640 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3641 info->node_calling_single_call ? ", single_call" : "",
3642 evaluation.to_double (), eval_threshold);
3644 return evaluation.to_int () >= eval_threshold;
3648 /* Grow vectors in AVALS and fill them with information about values of
3649 parameters that are known to be independent of the context. Only calculate
3650 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3651 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3652 parameters will be stored in it.
3654 TODO: Also grow context independent value range vectors. */
3656 static bool
3657 gather_context_independent_values (class ipa_node_params *info,
3658 ipa_auto_call_arg_values *avals,
3659 bool calculate_aggs,
3660 int *removable_params_cost)
3662 int i, count = ipa_get_param_count (info);
3663 bool ret = false;
3665 avals->m_known_vals.safe_grow_cleared (count, true);
3666 avals->m_known_contexts.safe_grow_cleared (count, true);
3668 if (removable_params_cost)
3669 *removable_params_cost = 0;
3671 for (i = 0; i < count; i++)
3673 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3674 ipcp_lattice<tree> *lat = &plats->itself;
3676 if (lat->is_single_const ())
3678 ipcp_value<tree> *val = lat->values;
3679 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3680 avals->m_known_vals[i] = val->value;
3681 if (removable_params_cost)
3682 *removable_params_cost
3683 += estimate_move_cost (TREE_TYPE (val->value), false);
3684 ret = true;
3686 else if (removable_params_cost
3687 && !ipa_is_param_used (info, i))
3688 *removable_params_cost
3689 += ipa_get_param_move_cost (info, i);
3691 if (!ipa_is_param_used (info, i))
3692 continue;
3694 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3695 /* Do not account known context as reason for cloning. We can see
3696 if it permits devirtualization. */
3697 if (ctxlat->is_single_const ())
3698 avals->m_known_contexts[i] = ctxlat->values->value;
3700 if (calculate_aggs)
3701 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3704 return ret;
3707 /* Perform time and size measurement of NODE with the context given in AVALS,
3708 calculate the benefit compared to the node without specialization and store
3709 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3710 context-independent or unused removable parameters and EST_MOVE_COST, the
3711 estimated movement of the considered parameter. */
3713 static void
3714 perform_estimation_of_a_value (cgraph_node *node,
3715 ipa_auto_call_arg_values *avals,
3716 int removable_params_cost, int est_move_cost,
3717 ipcp_value_base *val)
3719 sreal time_benefit;
3720 ipa_call_estimates estimates;
3722 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3724 /* Extern inline functions have no cloning local time benefits because they
3725 will be inlined anyway. The only reason to clone them is if it enables
3726 optimization in any of the functions they call. */
3727 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3728 time_benefit = 0;
3729 else
3730 time_benefit = (estimates.nonspecialized_time - estimates.time)
3731 + (devirtualization_time_bonus (node, avals)
3732 + hint_time_bonus (node, estimates)
3733 + removable_params_cost + est_move_cost);
3735 int size = estimates.size;
3736 gcc_checking_assert (size >=0);
3737 /* The inliner-heuristics based estimates may think that in certain
3738 contexts some functions do not have any size at all but we want
3739 all specializations to have at least a tiny cost, not least not to
3740 divide by zero. */
3741 if (size == 0)
3742 size = 1;
3744 val->local_time_benefit = time_benefit;
3745 val->local_size_cost = size;
3748 /* Get the overall limit oof growth based on parameters extracted from growth.
3749 it does not really make sense to mix functions with different overall growth
3750 limits but it is possible and if it happens, we do not want to select one
3751 limit at random. */
3753 static long
3754 get_max_overall_size (cgraph_node *node)
3756 long max_new_size = orig_overall_size;
3757 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3758 if (max_new_size < large_unit)
3759 max_new_size = large_unit;
3760 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3761 max_new_size += max_new_size * unit_growth / 100 + 1;
3762 return max_new_size;
3765 /* Return true if NODE should be cloned just for a parameter removal, possibly
3766 dumping a reason if not. */
3768 static bool
3769 clone_for_param_removal_p (cgraph_node *node)
3771 if (!node->can_change_signature)
3773 if (dump_file && (dump_flags & TDF_DETAILS))
3774 fprintf (dump_file, " Not considering cloning to remove parameters, "
3775 "function cannot change signature.\n");
3776 return false;
3778 if (node->can_be_local_p ())
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781 fprintf (dump_file, " Not considering cloning to remove parameters, "
3782 "IPA-SRA can do it potentially better.\n");
3783 return false;
3785 return true;
3788 /* Iterate over known values of parameters of NODE and estimate the local
3789 effects in terms of time and size they have. */
3791 static void
3792 estimate_local_effects (struct cgraph_node *node)
3794 ipa_node_params *info = ipa_node_params_sum->get (node);
3795 int count = ipa_get_param_count (info);
3796 bool always_const;
3797 int removable_params_cost;
3799 if (!count || !ipcp_versionable_function_p (node))
3800 return;
3802 if (dump_file && (dump_flags & TDF_DETAILS))
3803 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3805 ipa_auto_call_arg_values avals;
3806 always_const = gather_context_independent_values (info, &avals, true,
3807 &removable_params_cost);
3808 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3809 if (always_const || devirt_bonus
3810 || (removable_params_cost && clone_for_param_removal_p (node)))
3812 struct caller_statistics stats;
3813 ipa_call_estimates estimates;
3815 init_caller_stats (&stats);
3816 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3817 false);
3818 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3819 sreal time = estimates.nonspecialized_time - estimates.time;
3820 time += devirt_bonus;
3821 time += hint_time_bonus (node, estimates);
3822 time += removable_params_cost;
3823 int size = estimates.size - stats.n_calls * removable_params_cost;
3825 if (dump_file)
3826 fprintf (dump_file, " - context independent values, size: %i, "
3827 "time_benefit: %f\n", size, (time).to_double ());
3829 if (size <= 0 || node->local)
3831 info->do_clone_for_all_contexts = true;
3833 if (dump_file)
3834 fprintf (dump_file, " Decided to specialize for all "
3835 "known contexts, code not going to grow.\n");
3837 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3838 stats.count_sum, size))
3840 if (size + overall_size <= get_max_overall_size (node))
3842 info->do_clone_for_all_contexts = true;
3843 overall_size += size;
3845 if (dump_file)
3846 fprintf (dump_file, " Decided to specialize for all "
3847 "known contexts, growth (to %li) deemed "
3848 "beneficial.\n", overall_size);
3850 else if (dump_file && (dump_flags & TDF_DETAILS))
3851 fprintf (dump_file, " Not cloning for all contexts because "
3852 "maximum unit size would be reached with %li.\n",
3853 size + overall_size);
3855 else if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, " Not cloning for all contexts because "
3857 "!good_cloning_opportunity_p.\n");
3861 for (int i = 0; i < count; i++)
3863 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3864 ipcp_lattice<tree> *lat = &plats->itself;
3865 ipcp_value<tree> *val;
3867 if (lat->bottom
3868 || !lat->values
3869 || avals.m_known_vals[i])
3870 continue;
3872 for (val = lat->values; val; val = val->next)
3874 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3875 avals.m_known_vals[i] = val->value;
3877 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3878 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3879 emc, val);
3881 if (dump_file && (dump_flags & TDF_DETAILS))
3883 fprintf (dump_file, " - estimates for value ");
3884 print_ipcp_constant_value (dump_file, val->value);
3885 fprintf (dump_file, " for ");
3886 ipa_dump_param (dump_file, info, i);
3887 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3888 val->local_time_benefit.to_double (),
3889 val->local_size_cost);
3892 avals.m_known_vals[i] = NULL_TREE;
3895 for (int i = 0; i < count; i++)
3897 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3899 if (!plats->virt_call)
3900 continue;
3902 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3903 ipcp_value<ipa_polymorphic_call_context> *val;
3905 if (ctxlat->bottom
3906 || !ctxlat->values
3907 || !avals.m_known_contexts[i].useless_p ())
3908 continue;
3910 for (val = ctxlat->values; val; val = val->next)
3912 avals.m_known_contexts[i] = val->value;
3913 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3914 0, val);
3916 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, " - estimates for polymorphic context ");
3919 print_ipcp_constant_value (dump_file, val->value);
3920 fprintf (dump_file, " for ");
3921 ipa_dump_param (dump_file, info, i);
3922 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3923 val->local_time_benefit.to_double (),
3924 val->local_size_cost);
3927 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3930 unsigned all_ctx_len = avals.m_known_aggs.length ();
3931 auto_vec<ipa_argagg_value, 32> all_ctx;
3932 all_ctx.reserve_exact (all_ctx_len);
3933 all_ctx.splice (avals.m_known_aggs);
3934 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3936 unsigned j = 0;
3937 for (int index = 0; index < count; index++)
3939 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3941 if (plats->aggs_bottom || !plats->aggs)
3942 continue;
3944 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3946 ipcp_value<tree> *val;
3947 if (aglat->bottom || !aglat->values
3948 /* If the following is true, the one value is already part of all
3949 context estimations. */
3950 || (!plats->aggs_contain_variable
3951 && aglat->is_single_const ()))
3952 continue;
3954 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3955 while (j < all_ctx_len
3956 && (all_ctx[j].index < index
3957 || (all_ctx[j].index == index
3958 && all_ctx[j].unit_offset < unit_offset)))
3960 avals.m_known_aggs[j] = all_ctx[j];
3961 j++;
3964 for (unsigned k = j; k < all_ctx_len; k++)
3965 avals.m_known_aggs[k+1] = all_ctx[k];
3967 for (val = aglat->values; val; val = val->next)
3969 avals.m_known_aggs[j].value = val->value;
3970 avals.m_known_aggs[j].unit_offset = unit_offset;
3971 avals.m_known_aggs[j].index = index;
3972 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3974 perform_estimation_of_a_value (node, &avals,
3975 removable_params_cost, 0, val);
3977 if (dump_file && (dump_flags & TDF_DETAILS))
3979 fprintf (dump_file, " - estimates for value ");
3980 print_ipcp_constant_value (dump_file, val->value);
3981 fprintf (dump_file, " for ");
3982 ipa_dump_param (dump_file, info, index);
3983 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3984 "]: time_benefit: %g, size: %i\n",
3985 plats->aggs_by_ref ? "ref " : "",
3986 aglat->offset,
3987 val->local_time_benefit.to_double (),
3988 val->local_size_cost);
3996 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3997 topological sort of values. */
3999 template <typename valtype>
4000 void
4001 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
4003 ipcp_value_source<valtype> *src;
4005 if (cur_val->dfs)
4006 return;
4008 dfs_counter++;
4009 cur_val->dfs = dfs_counter;
4010 cur_val->low_link = dfs_counter;
4012 cur_val->topo_next = stack;
4013 stack = cur_val;
4014 cur_val->on_stack = true;
4016 for (src = cur_val->sources; src; src = src->next)
4017 if (src->val)
4019 if (src->val->dfs == 0)
4021 add_val (src->val);
4022 if (src->val->low_link < cur_val->low_link)
4023 cur_val->low_link = src->val->low_link;
4025 else if (src->val->on_stack
4026 && src->val->dfs < cur_val->low_link)
4027 cur_val->low_link = src->val->dfs;
4030 if (cur_val->dfs == cur_val->low_link)
4032 ipcp_value<valtype> *v, *scc_list = NULL;
4036 v = stack;
4037 stack = v->topo_next;
4038 v->on_stack = false;
4039 v->scc_no = cur_val->dfs;
4041 v->scc_next = scc_list;
4042 scc_list = v;
4044 while (v != cur_val);
4046 cur_val->topo_next = values_topo;
4047 values_topo = cur_val;
4051 /* Add all values in lattices associated with NODE to the topological sort if
4052 they are not there yet. */
4054 static void
4055 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
4057 ipa_node_params *info = ipa_node_params_sum->get (node);
4058 int i, count = ipa_get_param_count (info);
4060 for (i = 0; i < count; i++)
4062 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4063 ipcp_lattice<tree> *lat = &plats->itself;
4064 struct ipcp_agg_lattice *aglat;
4066 if (!lat->bottom)
4068 ipcp_value<tree> *val;
4069 for (val = lat->values; val; val = val->next)
4070 topo->constants.add_val (val);
4073 if (!plats->aggs_bottom)
4074 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4075 if (!aglat->bottom)
4077 ipcp_value<tree> *val;
4078 for (val = aglat->values; val; val = val->next)
4079 topo->constants.add_val (val);
4082 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4083 if (!ctxlat->bottom)
4085 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4086 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4087 topo->contexts.add_val (ctxval);
4092 /* One pass of constants propagation along the call graph edges, from callers
4093 to callees (requires topological ordering in TOPO), iterate over strongly
4094 connected components. */
4096 static void
4097 propagate_constants_topo (class ipa_topo_info *topo)
4099 int i;
4101 for (i = topo->nnodes - 1; i >= 0; i--)
4103 unsigned j;
4104 struct cgraph_node *v, *node = topo->order[i];
4105 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4107 /* First, iteratively propagate within the strongly connected component
4108 until all lattices stabilize. */
4109 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4110 if (v->has_gimple_body_p ())
4112 if (opt_for_fn (v->decl, flag_ipa_cp)
4113 && opt_for_fn (v->decl, optimize))
4114 push_node_to_stack (topo, v);
4115 /* When V is not optimized, we can not push it to stack, but
4116 still we need to set all its callees lattices to bottom. */
4117 else
4119 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4120 propagate_constants_across_call (cs);
4124 v = pop_node_from_stack (topo);
4125 while (v)
4127 struct cgraph_edge *cs;
4128 class ipa_node_params *info = NULL;
4129 bool self_scc = true;
4131 for (cs = v->callees; cs; cs = cs->next_callee)
4132 if (ipa_edge_within_scc (cs))
4134 cgraph_node *callee = cs->callee->function_symbol ();
4136 if (v != callee)
4137 self_scc = false;
4139 if (!info)
4141 info = ipa_node_params_sum->get (v);
4142 info->node_within_scc = true;
4145 if (propagate_constants_across_call (cs))
4146 push_node_to_stack (topo, callee);
4149 if (info)
4150 info->node_is_self_scc = self_scc;
4152 v = pop_node_from_stack (topo);
4155 /* Afterwards, propagate along edges leading out of the SCC, calculates
4156 the local effects of the discovered constants and all valid values to
4157 their topological sort. */
4158 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4159 if (v->has_gimple_body_p ()
4160 && opt_for_fn (v->decl, flag_ipa_cp)
4161 && opt_for_fn (v->decl, optimize))
4163 struct cgraph_edge *cs;
4165 estimate_local_effects (v);
4166 add_all_node_vals_to_toposort (v, topo);
4167 for (cs = v->callees; cs; cs = cs->next_callee)
4168 if (!ipa_edge_within_scc (cs))
4169 propagate_constants_across_call (cs);
4171 cycle_nodes.release ();
4175 /* Propagate the estimated effects of individual values along the topological
4176 from the dependent values to those they depend on. */
4178 template <typename valtype>
4179 void
4180 value_topo_info<valtype>::propagate_effects ()
4182 ipcp_value<valtype> *base;
4183 hash_set<ipcp_value<valtype> *> processed_srcvals;
4185 for (base = values_topo; base; base = base->topo_next)
4187 ipcp_value_source<valtype> *src;
4188 ipcp_value<valtype> *val;
4189 sreal time = 0;
4190 HOST_WIDE_INT size = 0;
4192 for (val = base; val; val = val->scc_next)
4194 time = time + val->local_time_benefit + val->prop_time_benefit;
4195 size = size + val->local_size_cost + val->prop_size_cost;
4198 for (val = base; val; val = val->scc_next)
4200 processed_srcvals.empty ();
4201 for (src = val->sources; src; src = src->next)
4202 if (src->val
4203 && src->cs->maybe_hot_p ())
4205 if (!processed_srcvals.add (src->val))
4207 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4208 if (prop_size < INT_MAX)
4209 src->val->prop_size_cost = prop_size;
4210 else
4211 continue;
4214 int special_factor = 1;
4215 if (val->same_scc (src->val))
4216 special_factor
4217 = opt_for_fn(src->cs->caller->decl,
4218 param_ipa_cp_recursive_freq_factor);
4219 else if (val->self_recursion_generated_p ()
4220 && (src->cs->callee->function_symbol ()
4221 == src->cs->caller))
4223 int max_recur_gen_depth
4224 = opt_for_fn(src->cs->caller->decl,
4225 param_ipa_cp_max_recursive_depth);
4226 special_factor = max_recur_gen_depth
4227 - val->self_recursion_generated_level + 1;
4230 src->val->prop_time_benefit
4231 += time * special_factor * src->cs->sreal_frequency ();
4234 if (size < INT_MAX)
4236 val->prop_time_benefit = time;
4237 val->prop_size_cost = size;
4239 else
4241 val->prop_time_benefit = 0;
4242 val->prop_size_cost = 0;
4248 /* Callback for qsort to sort counts of all edges. */
4250 static int
4251 compare_edge_profile_counts (const void *a, const void *b)
4253 const profile_count *cnt1 = (const profile_count *) a;
4254 const profile_count *cnt2 = (const profile_count *) b;
4256 if (*cnt1 < *cnt2)
4257 return 1;
4258 if (*cnt1 > *cnt2)
4259 return -1;
4260 return 0;
4264 /* Propagate constants, polymorphic contexts and their effects from the
4265 summaries interprocedurally. */
4267 static void
4268 ipcp_propagate_stage (class ipa_topo_info *topo)
4270 struct cgraph_node *node;
4272 if (dump_file)
4273 fprintf (dump_file, "\n Propagating constants:\n\n");
4275 base_count = profile_count::uninitialized ();
4277 bool compute_count_base = false;
4278 unsigned base_count_pos_percent = 0;
4279 FOR_EACH_DEFINED_FUNCTION (node)
4281 if (node->has_gimple_body_p ()
4282 && opt_for_fn (node->decl, flag_ipa_cp)
4283 && opt_for_fn (node->decl, optimize))
4285 ipa_node_params *info = ipa_node_params_sum->get (node);
4286 determine_versionability (node, info);
4288 unsigned nlattices = ipa_get_param_count (info);
4289 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4290 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4291 initialize_node_lattices (node);
4293 ipa_size_summary *s = ipa_size_summaries->get (node);
4294 if (node->definition && !node->alias && s != NULL)
4295 overall_size += s->self_size;
4296 if (node->count.ipa ().initialized_p ())
4298 compute_count_base = true;
4299 unsigned pos_percent = opt_for_fn (node->decl,
4300 param_ipa_cp_profile_count_base);
4301 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4305 if (compute_count_base)
4307 auto_vec<profile_count> all_edge_counts;
4308 all_edge_counts.reserve_exact (symtab->edges_count);
4309 FOR_EACH_DEFINED_FUNCTION (node)
4310 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4312 profile_count count = cs->count.ipa ();
4313 if (!count.nonzero_p ())
4314 continue;
4316 enum availability avail;
4317 cgraph_node *tgt
4318 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4319 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4320 if (info && info->versionable)
4321 all_edge_counts.quick_push (count);
4324 if (!all_edge_counts.is_empty ())
4326 gcc_assert (base_count_pos_percent <= 100);
4327 all_edge_counts.qsort (compare_edge_profile_counts);
4329 unsigned base_count_pos
4330 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4331 base_count = all_edge_counts[base_count_pos];
4333 if (dump_file)
4335 fprintf (dump_file, "\nSelected base_count from %u edges at "
4336 "position %u, arriving at: ", all_edge_counts.length (),
4337 base_count_pos);
4338 base_count.dump (dump_file);
4339 fprintf (dump_file, "\n");
4342 else if (dump_file)
4343 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4344 "continuing as if without profile feedback.\n");
4347 orig_overall_size = overall_size;
4349 if (dump_file)
4350 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4352 propagate_constants_topo (topo);
4353 if (flag_checking)
4354 ipcp_verify_propagated_values ();
4355 topo->constants.propagate_effects ();
4356 topo->contexts.propagate_effects ();
4358 if (dump_file)
4360 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4361 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4365 /* Discover newly direct outgoing edges from NODE which is a new clone with
4366 known KNOWN_CSTS and make them direct. */
4368 static void
4369 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4370 vec<tree> known_csts,
4371 vec<ipa_polymorphic_call_context>
4372 known_contexts,
4373 vec<ipa_argagg_value, va_gc> *aggvals)
4375 struct cgraph_edge *ie, *next_ie;
4376 bool found = false;
4378 for (ie = node->indirect_calls; ie; ie = next_ie)
4380 tree target;
4381 bool speculative;
4383 next_ie = ie->next_callee;
4384 ipa_argagg_value_list avs (aggvals);
4385 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4386 avs, &speculative);
4387 if (target)
4389 bool agg_contents = ie->indirect_info->agg_contents;
4390 bool polymorphic = ie->indirect_info->polymorphic;
4391 int param_index = ie->indirect_info->param_index;
4392 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4393 speculative);
4394 found = true;
4396 if (cs && !agg_contents && !polymorphic)
4398 ipa_node_params *info = ipa_node_params_sum->get (node);
4399 int c = ipa_get_controlled_uses (info, param_index);
4400 if (c != IPA_UNDESCRIBED_USE
4401 && !ipa_get_param_load_dereferenced (info, param_index))
4403 struct ipa_ref *to_del;
4405 c--;
4406 ipa_set_controlled_uses (info, param_index, c);
4407 if (dump_file && (dump_flags & TDF_DETAILS))
4408 fprintf (dump_file, " controlled uses count of param "
4409 "%i bumped down to %i\n", param_index, c);
4410 if (c == 0
4411 && (to_del = node->find_reference (cs->callee, NULL, 0,
4412 IPA_REF_ADDR)))
4414 if (dump_file && (dump_flags & TDF_DETAILS))
4415 fprintf (dump_file, " and even removing its "
4416 "cloning-created reference\n");
4417 to_del->remove_reference ();
4423 /* Turning calls to direct calls will improve overall summary. */
4424 if (found)
4425 ipa_update_overall_fn_summary (node);
4428 class edge_clone_summary;
4429 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4431 /* Edge clone summary. */
4433 class edge_clone_summary
4435 public:
4436 /* Default constructor. */
4437 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4439 /* Default destructor. */
4440 ~edge_clone_summary ()
4442 if (prev_clone)
4443 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4444 if (next_clone)
4445 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4448 cgraph_edge *prev_clone;
4449 cgraph_edge *next_clone;
4452 class edge_clone_summary_t:
4453 public call_summary <edge_clone_summary *>
4455 public:
4456 edge_clone_summary_t (symbol_table *symtab):
4457 call_summary <edge_clone_summary *> (symtab)
4459 m_initialize_when_cloning = true;
4462 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4463 edge_clone_summary *src_data,
4464 edge_clone_summary *dst_data) final override;
4467 /* Edge duplication hook. */
4469 void
4470 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4471 edge_clone_summary *src_data,
4472 edge_clone_summary *dst_data)
4474 if (src_data->next_clone)
4475 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4476 dst_data->prev_clone = src_edge;
4477 dst_data->next_clone = src_data->next_clone;
4478 src_data->next_clone = dst_edge;
4481 /* Return true is CS calls DEST or its clone for all contexts. When
4482 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4483 edges from/to an all-context clone. */
4485 static bool
4486 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4487 bool allow_recursion_to_clone)
4489 enum availability availability;
4490 cgraph_node *callee = cs->callee->function_symbol (&availability);
4492 if (availability <= AVAIL_INTERPOSABLE)
4493 return false;
4494 if (callee == dest)
4495 return true;
4496 if (!allow_recursion_to_clone && cs->caller == callee)
4497 return false;
4499 ipa_node_params *info = ipa_node_params_sum->get (callee);
4500 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4503 /* Return true if edge CS does bring about the value described by SRC to
4504 DEST_VAL of node DEST or its clone for all contexts. */
4506 static bool
4507 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4508 cgraph_node *dest, ipcp_value<tree> *dest_val)
4510 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4512 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4513 || caller_info->node_dead)
4514 return false;
4516 if (!src->val)
4517 return true;
4519 if (caller_info->ipcp_orig_node)
4521 tree t = NULL_TREE;
4522 if (src->offset == -1)
4523 t = caller_info->known_csts[src->index];
4524 else if (ipcp_transformation *ts
4525 = ipcp_get_transformation_summary (cs->caller))
4527 ipa_argagg_value_list avl (ts);
4528 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4530 return (t != NULL_TREE
4531 && values_equal_for_ipcp_p (src->val->value, t));
4533 else
4535 if (src->val == dest_val)
4536 return true;
4538 struct ipcp_agg_lattice *aglat;
4539 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4540 src->index);
4541 if (src->offset == -1)
4542 return (plats->itself.is_single_const ()
4543 && values_equal_for_ipcp_p (src->val->value,
4544 plats->itself.values->value));
4545 else
4547 if (plats->aggs_bottom || plats->aggs_contain_variable)
4548 return false;
4549 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4550 if (aglat->offset == src->offset)
4551 return (aglat->is_single_const ()
4552 && values_equal_for_ipcp_p (src->val->value,
4553 aglat->values->value));
4555 return false;
4559 /* Return true if edge CS does bring about the value described by SRC to
4560 DST_VAL of node DEST or its clone for all contexts. */
4562 static bool
4563 cgraph_edge_brings_value_p (cgraph_edge *cs,
4564 ipcp_value_source<ipa_polymorphic_call_context> *src,
4565 cgraph_node *dest,
4566 ipcp_value<ipa_polymorphic_call_context> *)
4568 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4570 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4571 || caller_info->node_dead)
4572 return false;
4573 if (!src->val)
4574 return true;
4576 if (caller_info->ipcp_orig_node)
4577 return (caller_info->known_contexts.length () > (unsigned) src->index)
4578 && values_equal_for_ipcp_p (src->val->value,
4579 caller_info->known_contexts[src->index]);
4581 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4582 src->index);
4583 return plats->ctxlat.is_single_const ()
4584 && values_equal_for_ipcp_p (src->val->value,
4585 plats->ctxlat.values->value);
4588 /* Get the next clone in the linked list of clones of an edge. */
4590 static inline struct cgraph_edge *
4591 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4593 edge_clone_summary *s = edge_clone_summaries->get (cs);
4594 return s != NULL ? s->next_clone : NULL;
4597 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4598 of them is viable and hot, return true. In that case, for those that still
4599 hold, add their edge frequency and their number and cumulative profile
4600 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4601 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4603 template <typename valtype>
4604 static bool
4605 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4606 sreal *freq_sum, int *caller_count,
4607 profile_count *rec_count_sum,
4608 profile_count *nonrec_count_sum)
4610 ipcp_value_source<valtype> *src;
4611 sreal freq = 0;
4612 int count = 0;
4613 profile_count rec_cnt = profile_count::zero ();
4614 profile_count nonrec_cnt = profile_count::zero ();
4615 bool hot = false;
4616 bool non_self_recursive = false;
4618 for (src = val->sources; src; src = src->next)
4620 struct cgraph_edge *cs = src->cs;
4621 while (cs)
4623 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4625 count++;
4626 freq += cs->sreal_frequency ();
4627 hot |= cs->maybe_hot_p ();
4628 if (cs->caller != dest)
4630 non_self_recursive = true;
4631 if (cs->count.ipa ().initialized_p ())
4632 rec_cnt += cs->count.ipa ();
4634 else if (cs->count.ipa ().initialized_p ())
4635 nonrec_cnt += cs->count.ipa ();
4637 cs = get_next_cgraph_edge_clone (cs);
4641 /* If the only edges bringing a value are self-recursive ones, do not bother
4642 evaluating it. */
4643 if (!non_self_recursive)
4644 return false;
4646 *freq_sum = freq;
4647 *caller_count = count;
4648 *rec_count_sum = rec_cnt;
4649 *nonrec_count_sum = nonrec_cnt;
4651 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4653 struct cgraph_edge *cs;
4655 /* Cold non-SCC source edge could trigger hot recursive execution of
4656 function. Consider the case as hot and rely on following cost model
4657 computation to further select right one. */
4658 for (cs = dest->callers; cs; cs = cs->next_caller)
4659 if (cs->caller == dest && cs->maybe_hot_p ())
4660 return true;
4663 return hot;
4666 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4667 to let a non-self-recursive caller be the first element. Thus, we can
4668 simplify intersecting operations on values that arrive from all of these
4669 callers, especially when there exists self-recursive call. Return true if
4670 this kind of adjustment is possible. */
4672 static bool
4673 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4674 cgraph_node *node)
4676 for (unsigned i = 0; i < callers.length (); i++)
4678 cgraph_edge *cs = callers[i];
4680 if (cs->caller != node)
4682 if (i > 0)
4684 callers[i] = callers[0];
4685 callers[0] = cs;
4687 return true;
4690 return false;
4693 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4694 is assumed their number is known and equal to CALLER_COUNT. */
4696 template <typename valtype>
4697 static vec<cgraph_edge *>
4698 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4699 int caller_count)
4701 ipcp_value_source<valtype> *src;
4702 vec<cgraph_edge *> ret;
4704 ret.create (caller_count);
4705 for (src = val->sources; src; src = src->next)
4707 struct cgraph_edge *cs = src->cs;
4708 while (cs)
4710 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4711 ret.quick_push (cs);
4712 cs = get_next_cgraph_edge_clone (cs);
4716 if (caller_count > 1)
4717 adjust_callers_for_value_intersection (ret, dest);
4719 return ret;
4722 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4723 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4724 should be set to true when the reference created for the constant should be
4725 a load one and not an address one because the corresponding parameter p is
4726 only used as *p. */
4728 static struct ipa_replace_map *
4729 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4730 bool force_load_ref)
4732 struct ipa_replace_map *replace_map;
4734 replace_map = ggc_alloc<ipa_replace_map> ();
4735 if (dump_file)
4737 fprintf (dump_file, " replacing ");
4738 ipa_dump_param (dump_file, info, parm_num);
4740 fprintf (dump_file, " with const ");
4741 print_generic_expr (dump_file, value);
4743 if (force_load_ref)
4744 fprintf (dump_file, " - forcing load reference\n");
4745 else
4746 fprintf (dump_file, "\n");
4748 replace_map->parm_num = parm_num;
4749 replace_map->new_tree = value;
4750 replace_map->force_load_ref = force_load_ref;
4751 return replace_map;
4754 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4755 one, otherwise it will be referred to as the original node. */
4757 static void
4758 dump_profile_updates (cgraph_node *node, bool spec)
4760 if (spec)
4761 fprintf (dump_file, " setting count of the specialized node %s to ",
4762 node->dump_name ());
4763 else
4764 fprintf (dump_file, " setting count of the original node %s to ",
4765 node->dump_name ());
4767 node->count.dump (dump_file);
4768 fprintf (dump_file, "\n");
4769 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4771 fprintf (dump_file, " edge to %s has count ",
4772 cs->callee->dump_name ());
4773 cs->count.dump (dump_file);
4774 fprintf (dump_file, "\n");
4778 /* With partial train run we do not want to assume that original's count is
4779 zero whenever we redurect all executed edges to clone. Simply drop profile
4780 to local one in this case. In eany case, return the new value. ORIG_NODE
4781 is the original node and its count has not been updaed yet. */
4783 profile_count
4784 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4786 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4787 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4788 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4789 remainder = remainder.guessed_local ();
4791 return remainder;
4794 /* Structure to sum counts coming from nodes other than the original node and
4795 its clones. */
4797 struct gather_other_count_struct
4799 cgraph_node *orig;
4800 profile_count other_count;
4803 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4804 counts that come from non-self-recursive calls.. */
4806 static bool
4807 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4809 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4810 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4811 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4812 desc->other_count += cs->count.ipa ();
4813 return false;
4816 /* Structure to help analyze if we need to boost counts of some clones of some
4817 non-recursive edges to match the new callee count. */
4819 struct desc_incoming_count_struct
4821 cgraph_node *orig;
4822 hash_set <cgraph_edge *> *processed_edges;
4823 profile_count count;
4824 unsigned unproc_orig_rec_edges;
4827 /* Go over edges calling NODE and its thunks and gather information about
4828 incoming counts so that we know if we need to make any adjustments. */
4830 static void
4831 analyze_clone_icoming_counts (cgraph_node *node,
4832 desc_incoming_count_struct *desc)
4834 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4835 if (cs->caller->thunk)
4837 analyze_clone_icoming_counts (cs->caller, desc);
4838 continue;
4840 else
4842 if (cs->count.initialized_p ())
4843 desc->count += cs->count.ipa ();
4844 if (!desc->processed_edges->contains (cs)
4845 && cs->caller->clone_of == desc->orig)
4846 desc->unproc_orig_rec_edges++;
4850 /* If caller edge counts of a clone created for a self-recursive arithmetic
4851 jump function must be adjusted because it is coming from a the "seed" clone
4852 for the first value and so has been excessively scaled back as if it was not
4853 a recursive call, adjust it so that the incoming counts of NODE match its
4854 count. NODE is the node or its thunk. */
4856 static void
4857 adjust_clone_incoming_counts (cgraph_node *node,
4858 desc_incoming_count_struct *desc)
4860 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4861 if (cs->caller->thunk)
4863 adjust_clone_incoming_counts (cs->caller, desc);
4864 profile_count sum = profile_count::zero ();
4865 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4866 if (e->count.initialized_p ())
4867 sum += e->count.ipa ();
4868 cs->count = cs->count.combine_with_ipa_count (sum);
4870 else if (!desc->processed_edges->contains (cs)
4871 && cs->caller->clone_of == desc->orig)
4873 cs->count += desc->count;
4874 if (dump_file)
4876 fprintf (dump_file, " Adjusted count of an incoming edge of "
4877 "a clone %s -> %s to ", cs->caller->dump_name (),
4878 cs->callee->dump_name ());
4879 cs->count.dump (dump_file);
4880 fprintf (dump_file, "\n");
4885 /* When ORIG_NODE has been cloned for values which have been generated fora
4886 self-recursive call as a result of an arithmetic pass-through
4887 jump-functions, adjust its count together with counts of all such clones in
4888 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4890 The function sums the counts of the original node and all its clones that
4891 cannot be attributed to a specific clone because it comes from a
4892 non-recursive edge. This sum is then evenly divided between the clones and
4893 on top of that each one gets all the counts which can be attributed directly
4894 to it. */
4896 static void
4897 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4898 const vec<cgraph_node *> &self_gen_clones)
4900 profile_count redist_sum = orig_node->count.ipa ();
4901 if (!(redist_sum > profile_count::zero ()))
4902 return;
4904 if (dump_file)
4905 fprintf (dump_file, " Updating profile of self recursive clone "
4906 "series\n");
4908 gather_other_count_struct gocs;
4909 gocs.orig = orig_node;
4910 gocs.other_count = profile_count::zero ();
4912 auto_vec <profile_count, 8> other_edges_count;
4913 for (cgraph_node *n : self_gen_clones)
4915 gocs.other_count = profile_count::zero ();
4916 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4917 &gocs, false);
4918 other_edges_count.safe_push (gocs.other_count);
4919 redist_sum -= gocs.other_count;
4922 hash_set<cgraph_edge *> processed_edges;
4923 unsigned i = 0;
4924 for (cgraph_node *n : self_gen_clones)
4926 profile_count orig_count = n->count;
4927 profile_count new_count
4928 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4929 new_count = lenient_count_portion_handling (new_count, orig_node);
4930 n->count = new_count;
4931 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4932 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4934 cs->count = cs->count.apply_scale (new_count, orig_count);
4935 processed_edges.add (cs);
4937 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4938 cs->count = cs->count.apply_scale (new_count, orig_count);
4940 i++;
4943 /* There are still going to be edges to ORIG_NODE that have one or more
4944 clones coming from another node clone in SELF_GEN_CLONES and which we
4945 scaled by the same amount, which means that the total incoming sum of
4946 counts to ORIG_NODE will be too high, scale such edges back. */
4947 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4949 if (cs->callee->ultimate_alias_target () == orig_node)
4951 unsigned den = 0;
4952 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4953 if (e->callee->ultimate_alias_target () == orig_node
4954 && processed_edges.contains (e))
4955 den++;
4956 if (den > 0)
4957 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4958 if (e->callee->ultimate_alias_target () == orig_node
4959 && processed_edges.contains (e))
4960 e->count /= den;
4964 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4965 along self-recursive edges are likely to have fairly low count and so
4966 edges from them to nodes in the self_gen_clones do not correspond to the
4967 artificially distributed count of the nodes, the total sum of incoming
4968 edges to some clones might be too low. Detect this situation and correct
4969 it. */
4970 for (cgraph_node *n : self_gen_clones)
4972 if (!(n->count.ipa () > profile_count::zero ()))
4973 continue;
4975 desc_incoming_count_struct desc;
4976 desc.orig = orig_node;
4977 desc.processed_edges = &processed_edges;
4978 desc.count = profile_count::zero ();
4979 desc.unproc_orig_rec_edges = 0;
4980 analyze_clone_icoming_counts (n, &desc);
4982 if (n->count.differs_from_p (desc.count))
4984 if (n->count > desc.count
4985 && desc.unproc_orig_rec_edges > 0)
4987 desc.count = n->count - desc.count;
4988 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4989 adjust_clone_incoming_counts (n, &desc);
4991 else if (dump_file)
4992 fprintf (dump_file,
4993 " Unable to fix up incoming counts for %s.\n",
4994 n->dump_name ());
4998 if (dump_file)
4999 for (cgraph_node *n : self_gen_clones)
5000 dump_profile_updates (n, n != orig_node);
5001 return;
5004 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5005 their profile information to reflect this. This function should not be used
5006 for clones generated for arithmetic pass-through jump functions on a
5007 self-recursive call graph edge, that situation is handled by
5008 update_counts_for_self_gen_clones. */
5010 static void
5011 update_profiling_info (struct cgraph_node *orig_node,
5012 struct cgraph_node *new_node)
5014 struct caller_statistics stats;
5015 profile_count new_sum;
5016 profile_count remainder, orig_node_count = orig_node->count.ipa ();
5018 if (!(orig_node_count > profile_count::zero ()))
5019 return;
5021 if (dump_file)
5023 fprintf (dump_file, " Updating profile from original count: ");
5024 orig_node_count.dump (dump_file);
5025 fprintf (dump_file, "\n");
5028 init_caller_stats (&stats, new_node);
5029 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
5030 false);
5031 new_sum = stats.count_sum;
5033 bool orig_edges_processed = false;
5034 if (new_sum > orig_node_count)
5036 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5037 to global0 category. */
5038 remainder = orig_node->count.global0 ();
5040 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5041 cs->count = cs->count.global0 ();
5042 for (cgraph_edge *cs = orig_node->indirect_calls;
5044 cs = cs->next_callee)
5045 cs->count = cs->count.global0 ();
5046 orig_edges_processed = true;
5048 else if (stats.rec_count_sum.nonzero_p ())
5050 int new_nonrec_calls = stats.n_nonrec_calls;
5051 /* There are self-recursive edges which are likely to bring in the
5052 majority of calls but which we must divide in between the original and
5053 new node. */
5054 init_caller_stats (&stats, orig_node);
5055 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
5056 &stats, false);
5057 int orig_nonrec_calls = stats.n_nonrec_calls;
5058 profile_count orig_nonrec_call_count = stats.count_sum;
5060 if (orig_node->local)
5062 if (!orig_nonrec_call_count.nonzero_p ())
5064 if (dump_file)
5065 fprintf (dump_file, " The original is local and the only "
5066 "incoming edges from non-dead callers with nonzero "
5067 "counts are self-recursive, assuming it is cold.\n");
5068 /* The NEW_NODE count and counts of all its outgoing edges
5069 are still unmodified copies of ORIG_NODE's. Just clear
5070 the latter and bail out. */
5071 profile_count zero;
5072 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5073 zero = profile_count::zero ().guessed_local ();
5074 else
5075 zero = profile_count::adjusted_zero ();
5076 orig_node->count = zero;
5077 for (cgraph_edge *cs = orig_node->callees;
5079 cs = cs->next_callee)
5080 cs->count = zero;
5081 for (cgraph_edge *cs = orig_node->indirect_calls;
5083 cs = cs->next_callee)
5084 cs->count = zero;
5085 return;
5088 else
5090 /* Let's behave as if there was another caller that accounts for all
5091 the calls that were either indirect or from other compilation
5092 units. */
5093 orig_nonrec_calls++;
5094 profile_count pretend_caller_count
5095 = (orig_node_count - new_sum - orig_nonrec_call_count
5096 - stats.rec_count_sum);
5097 orig_nonrec_call_count += pretend_caller_count;
5100 /* Divide all "unexplained" counts roughly proportionally to sums of
5101 counts of non-recursive calls.
5103 We put rather arbitrary limits on how many counts we claim because the
5104 number of non-self-recursive incoming count is only a rough guideline
5105 and there are cases (such as mcf) where using it blindly just takes
5106 too many. And if lattices are considered in the opposite order we
5107 could also take too few. */
5108 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5110 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5111 profile_count new_part
5112 = MAX(MIN (unexp.apply_scale (new_sum,
5113 new_sum + orig_nonrec_call_count),
5114 unexp.apply_scale (limit_den - 1, limit_den)),
5115 unexp.apply_scale (new_nonrec_calls, limit_den));
5116 if (dump_file)
5118 fprintf (dump_file, " Claiming ");
5119 new_part.dump (dump_file);
5120 fprintf (dump_file, " of unexplained ");
5121 unexp.dump (dump_file);
5122 fprintf (dump_file, " counts because of self-recursive "
5123 "calls\n");
5125 new_sum += new_part;
5126 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5127 orig_node);
5129 else
5130 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5131 orig_node);
5133 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5134 new_node->count = new_sum;
5135 orig_node->count = remainder;
5137 profile_count orig_new_node_count = orig_node_count;
5138 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5139 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5140 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5141 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5142 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5144 if (!orig_edges_processed)
5146 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5147 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5148 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5149 for (cgraph_edge *cs = orig_node->indirect_calls;
5151 cs = cs->next_callee)
5152 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5155 if (dump_file)
5157 dump_profile_updates (new_node, true);
5158 dump_profile_updates (orig_node, false);
5162 /* Update the respective profile of specialized NEW_NODE and the original
5163 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5164 have been redirected to the specialized version. */
5166 static void
5167 update_specialized_profile (struct cgraph_node *new_node,
5168 struct cgraph_node *orig_node,
5169 profile_count redirected_sum)
5171 struct cgraph_edge *cs;
5172 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
5174 if (dump_file)
5176 fprintf (dump_file, " the sum of counts of redirected edges is ");
5177 redirected_sum.dump (dump_file);
5178 fprintf (dump_file, "\n old ipa count of the original node is ");
5179 orig_node_count.dump (dump_file);
5180 fprintf (dump_file, "\n");
5182 if (!(orig_node_count > profile_count::zero ()))
5183 return;
5185 new_node_count = new_node->count;
5186 new_node->count += redirected_sum;
5187 orig_node->count
5188 = lenient_count_portion_handling (orig_node->count - redirected_sum,
5189 orig_node);
5191 for (cs = new_node->callees; cs; cs = cs->next_callee)
5192 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5194 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5196 profile_count dec = cs->count.apply_scale (redirected_sum,
5197 orig_node_count);
5198 cs->count -= dec;
5201 if (dump_file)
5203 dump_profile_updates (new_node, true);
5204 dump_profile_updates (orig_node, false);
5208 static void adjust_references_in_caller (cgraph_edge *cs,
5209 symtab_node *symbol, int index);
5211 /* Simple structure to pass a symbol and index (with same meaning as parameters
5212 of adjust_references_in_caller) through a void* parameter of a
5213 call_for_symbol_thunks_and_aliases callback. */
5214 struct symbol_and_index_together
5216 symtab_node *symbol;
5217 int index;
5220 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5221 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5222 static bool
5223 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5225 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5226 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5227 if (!cs->caller->thunk)
5228 adjust_references_in_caller (cs, pack->symbol, pack->index);
5229 return false;
5232 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5233 variable which is only dereferenced and which is represented by SYMBOL. See
5234 if we can remove ADDR reference in callers assosiated witht the call. */
5236 static void
5237 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5239 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5240 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5241 if (jfunc->type == IPA_JF_CONST)
5243 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5244 cs->lto_stmt_uid,
5245 IPA_REF_ADDR);
5246 if (!to_del)
5247 return;
5248 to_del->remove_reference ();
5249 ipa_zap_jf_refdesc (jfunc);
5250 if (dump_file)
5251 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5252 cs->caller->dump_name (), symbol->dump_name ());
5253 return;
5256 if (jfunc->type != IPA_JF_PASS_THROUGH
5257 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
5258 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
5259 return;
5261 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5262 cgraph_node *caller = cs->caller;
5263 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5264 /* TODO: This consistency check may be too big and not really
5265 that useful. Consider removing it. */
5266 tree cst;
5267 if (caller_info->ipcp_orig_node)
5268 cst = caller_info->known_csts[fidx];
5269 else
5271 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5272 gcc_assert (lat->is_single_const ());
5273 cst = lat->values->value;
5275 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5276 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5277 == symbol));
5279 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5280 if (cuses == IPA_UNDESCRIBED_USE)
5281 return;
5282 gcc_assert (cuses > 0);
5283 cuses--;
5284 ipa_set_controlled_uses (caller_info, fidx, cuses);
5285 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5286 if (dump_file && (dump_flags & TDF_DETAILS))
5287 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5288 "to %i.\n", fidx, caller->dump_name (), cuses);
5289 if (cuses)
5290 return;
5292 if (caller_info->ipcp_orig_node)
5294 /* Cloning machinery has created a reference here, we need to either
5295 remove it or change it to a read one. */
5296 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5297 if (to_del)
5299 to_del->remove_reference ();
5300 if (dump_file)
5301 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5302 cs->caller->dump_name (), symbol->dump_name ());
5303 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5305 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5306 if (dump_file)
5307 fprintf (dump_file,
5308 " ...and replaced it with LOAD one.\n");
5313 symbol_and_index_together pack;
5314 pack.symbol = symbol;
5315 pack.index = fidx;
5316 if (caller->can_change_signature)
5317 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5318 &pack, true);
5322 /* Return true if we would like to remove a parameter from NODE when cloning it
5323 with KNOWN_CSTS scalar constants. */
5325 static bool
5326 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5328 auto_vec<bool, 16> surviving;
5329 bool filled_vec = false;
5330 ipa_node_params *info = ipa_node_params_sum->get (node);
5331 int i, count = ipa_get_param_count (info);
5333 for (i = 0; i < count; i++)
5335 if (!known_csts[i] && ipa_is_param_used (info, i))
5336 continue;
5338 if (!filled_vec)
5340 clone_info *info = clone_info::get (node);
5341 if (!info || !info->param_adjustments)
5342 return true;
5343 info->param_adjustments->get_surviving_params (&surviving);
5344 filled_vec = true;
5346 if (surviving.length() < (unsigned) i && surviving[i])
5347 return true;
5349 return false;
5352 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5353 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5354 redirect all edges in CALLERS to it. */
5356 static struct cgraph_node *
5357 create_specialized_node (struct cgraph_node *node,
5358 vec<tree> known_csts,
5359 vec<ipa_polymorphic_call_context> known_contexts,
5360 vec<ipa_argagg_value, va_gc> *aggvals,
5361 vec<cgraph_edge *> &callers)
5363 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5364 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5365 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5366 struct cgraph_node *new_node;
5367 int i, count = ipa_get_param_count (info);
5368 clone_info *cinfo = clone_info::get (node);
5369 ipa_param_adjustments *old_adjustments = cinfo
5370 ? cinfo->param_adjustments : NULL;
5371 ipa_param_adjustments *new_adjustments;
5372 gcc_assert (!info->ipcp_orig_node);
5373 gcc_assert (node->can_change_signature
5374 || !old_adjustments);
5376 if (old_adjustments)
5378 /* At the moment all IPA optimizations should use the number of
5379 parameters of the prevailing decl as the m_always_copy_start.
5380 Handling any other value would complicate the code below, so for the
5381 time bing let's only assert it is so. */
5382 gcc_assert (old_adjustments->m_always_copy_start == count
5383 || old_adjustments->m_always_copy_start < 0);
5384 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5385 for (i = 0; i < old_adj_count; i++)
5387 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5388 if (!node->can_change_signature
5389 || old_adj->op != IPA_PARAM_OP_COPY
5390 || (!known_csts[old_adj->base_index]
5391 && ipa_is_param_used (info, old_adj->base_index)))
5393 ipa_adjusted_param new_adj = *old_adj;
5395 new_adj.prev_clone_adjustment = true;
5396 new_adj.prev_clone_index = i;
5397 vec_safe_push (new_params, new_adj);
5400 bool skip_return = old_adjustments->m_skip_return;
5401 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5402 ipa_param_adjustments (new_params, count,
5403 skip_return));
5405 else if (node->can_change_signature
5406 && want_remove_some_param_p (node, known_csts))
5408 ipa_adjusted_param adj;
5409 memset (&adj, 0, sizeof (adj));
5410 adj.op = IPA_PARAM_OP_COPY;
5411 for (i = 0; i < count; i++)
5412 if (!known_csts[i] && ipa_is_param_used (info, i))
5414 adj.base_index = i;
5415 adj.prev_clone_index = i;
5416 vec_safe_push (new_params, adj);
5418 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5419 ipa_param_adjustments (new_params, count, false));
5421 else
5422 new_adjustments = NULL;
5424 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5425 for (i = callers.length () - 1; i >= 0; i--)
5427 cgraph_edge *cs = callers[i];
5428 if (cs->caller == node)
5430 self_recursive_calls.safe_push (cs);
5431 callers.unordered_remove (i);
5434 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5435 for (i = 0; i < count; i++)
5437 tree t = known_csts[i];
5438 if (!t)
5439 continue;
5441 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5443 bool load_ref = false;
5444 symtab_node *ref_symbol;
5445 if (TREE_CODE (t) == ADDR_EXPR)
5447 tree base = get_base_address (TREE_OPERAND (t, 0));
5448 if (TREE_CODE (base) == VAR_DECL
5449 && ipa_get_controlled_uses (info, i) == 0
5450 && ipa_get_param_load_dereferenced (info, i)
5451 && (ref_symbol = symtab_node::get (base)))
5453 load_ref = true;
5454 if (node->can_change_signature)
5455 for (cgraph_edge *caller : callers)
5456 adjust_references_in_caller (caller, ref_symbol, i);
5460 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5461 if (replace_map)
5462 vec_safe_push (replace_trees, replace_map);
5465 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5466 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5467 node->decl)));
5468 new_node = node->create_virtual_clone (callers, replace_trees,
5469 new_adjustments, "constprop",
5470 suffix_counter);
5471 suffix_counter++;
5473 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5474 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5476 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5477 /* Cloned edges can disappear during cloning as speculation can be
5478 resolved, check that we have one and that it comes from the last
5479 cloning. */
5480 if (cs && cs->caller == new_node)
5481 cs->redirect_callee_duplicating_thunks (new_node);
5482 /* Any future code that would make more than one clone of an outgoing
5483 edge would confuse this mechanism, so let's check that does not
5484 happen. */
5485 gcc_checking_assert (!cs
5486 || !get_next_cgraph_edge_clone (cs)
5487 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5489 if (have_self_recursive_calls)
5490 new_node->expand_all_artificial_thunks ();
5492 ipa_set_node_agg_value_chain (new_node, aggvals);
5493 for (const ipa_argagg_value &av : aggvals)
5494 new_node->maybe_create_reference (av.value, NULL);
5496 if (dump_file && (dump_flags & TDF_DETAILS))
5498 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5499 if (known_contexts.exists ())
5501 for (i = 0; i < count; i++)
5502 if (!known_contexts[i].useless_p ())
5504 fprintf (dump_file, " known ctx %i is ", i);
5505 known_contexts[i].dump (dump_file);
5508 if (aggvals)
5510 fprintf (dump_file, " Aggregate replacements:");
5511 ipa_argagg_value_list avs (aggvals);
5512 avs.dump (dump_file);
5516 new_info = ipa_node_params_sum->get (new_node);
5517 new_info->ipcp_orig_node = node;
5518 new_node->ipcp_clone = true;
5519 new_info->known_csts = known_csts;
5520 new_info->known_contexts = known_contexts;
5522 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5523 aggvals);
5525 return new_node;
5528 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5529 pass-through function to itself when the cgraph_node involved is not an
5530 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5531 no-operation pass-through. */
5533 static bool
5534 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5535 bool simple = true)
5537 enum availability availability;
5538 if (cs->caller == cs->callee->function_symbol (&availability)
5539 && availability > AVAIL_INTERPOSABLE
5540 && jfunc->type == IPA_JF_PASS_THROUGH
5541 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5542 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5543 && ipa_node_params_sum->get (cs->caller)
5544 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5545 return true;
5546 return false;
5549 /* Return true if JFUNC, which describes a part of an aggregate represented or
5550 pointed to by the i-th parameter of call CS, is a pass-through function to
5551 itself when the cgraph_node involved is not an IPA-CP clone.. When
5552 SIMPLE is true, further check if JFUNC is a simple no-operation
5553 pass-through. */
5555 static bool
5556 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5557 const ipa_agg_jf_item *jfunc,
5558 int i, bool simple = true)
5560 enum availability availability;
5561 if (cs->caller == cs->callee->function_symbol (&availability)
5562 && availability > AVAIL_INTERPOSABLE
5563 && jfunc->jftype == IPA_JF_LOAD_AGG
5564 && jfunc->offset == jfunc->value.load_agg.offset
5565 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5566 && jfunc->value.pass_through.formal_id == i
5567 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5568 && ipa_node_params_sum->get (cs->caller)
5569 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5570 return true;
5571 return false;
5574 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5575 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5577 static void
5578 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5579 vec<tree> &known_csts,
5580 const vec<cgraph_edge *> &callers)
5582 ipa_node_params *info = ipa_node_params_sum->get (node);
5583 int i, count = ipa_get_param_count (info);
5585 for (i = 0; i < count; i++)
5587 struct cgraph_edge *cs;
5588 tree newval = NULL_TREE;
5589 int j;
5590 bool first = true;
5591 tree type = ipa_get_type (info, i);
5593 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5594 continue;
5596 FOR_EACH_VEC_ELT (callers, j, cs)
5598 struct ipa_jump_func *jump_func;
5599 tree t;
5601 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5602 if (!args
5603 || i >= ipa_get_cs_argument_count (args)
5604 || (i == 0
5605 && call_passes_through_thunk (cs)))
5607 newval = NULL_TREE;
5608 break;
5610 jump_func = ipa_get_ith_jump_func (args, i);
5612 /* Besides simple pass-through jump function, arithmetic jump
5613 function could also introduce argument-direct-pass-through for
5614 self-feeding recursive call. For example,
5616 fn (int i)
5618 fn (i & 1);
5621 Given that i is 0, recursive propagation via (i & 1) also gets
5622 0. */
5623 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5625 gcc_assert (newval);
5626 t = ipa_get_jf_arith_result (
5627 ipa_get_jf_pass_through_operation (jump_func),
5628 newval,
5629 ipa_get_jf_pass_through_operand (jump_func),
5630 type);
5632 else
5633 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5634 jump_func, type);
5635 if (!t
5636 || (newval
5637 && !values_equal_for_ipcp_p (t, newval))
5638 || (!first && !newval))
5640 newval = NULL_TREE;
5641 break;
5643 else
5644 newval = t;
5645 first = false;
5648 if (newval)
5650 if (dump_file && (dump_flags & TDF_DETAILS))
5652 fprintf (dump_file, " adding an extra known scalar value ");
5653 print_ipcp_constant_value (dump_file, newval);
5654 fprintf (dump_file, " for ");
5655 ipa_dump_param (dump_file, info, i);
5656 fprintf (dump_file, "\n");
5659 known_csts[i] = newval;
5664 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5665 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5666 CALLERS. */
5668 static void
5669 find_more_contexts_for_caller_subset (cgraph_node *node,
5670 vec<ipa_polymorphic_call_context>
5671 *known_contexts,
5672 const vec<cgraph_edge *> &callers)
5674 ipa_node_params *info = ipa_node_params_sum->get (node);
5675 int i, count = ipa_get_param_count (info);
5677 for (i = 0; i < count; i++)
5679 cgraph_edge *cs;
5681 if (ipa_get_poly_ctx_lat (info, i)->bottom
5682 || (known_contexts->exists ()
5683 && !(*known_contexts)[i].useless_p ()))
5684 continue;
5686 ipa_polymorphic_call_context newval;
5687 bool first = true;
5688 int j;
5690 FOR_EACH_VEC_ELT (callers, j, cs)
5692 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5693 if (!args
5694 || i >= ipa_get_cs_argument_count (args))
5695 return;
5696 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5697 ipa_polymorphic_call_context ctx;
5698 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5699 cs, i, jfunc);
5700 if (first)
5702 newval = ctx;
5703 first = false;
5705 else
5706 newval.meet_with (ctx);
5707 if (newval.useless_p ())
5708 break;
5711 if (!newval.useless_p ())
5713 if (dump_file && (dump_flags & TDF_DETAILS))
5715 fprintf (dump_file, " adding an extra known polymorphic "
5716 "context ");
5717 print_ipcp_constant_value (dump_file, newval);
5718 fprintf (dump_file, " for ");
5719 ipa_dump_param (dump_file, info, i);
5720 fprintf (dump_file, "\n");
5723 if (!known_contexts->exists ())
5724 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5725 true);
5726 (*known_contexts)[i] = newval;
5732 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5733 RES. If INTERIM is non-NULL, it contains the current interim state of
5734 collected aggregate values which can be used to compute values passed over
5735 self-recursive edges.
5737 This basically one iteration of push_agg_values_from_edge over one
5738 parameter, which allows for simpler early returns. */
5740 static void
5741 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5742 vec<ipa_argagg_value> *res,
5743 const ipa_argagg_value_list *interim)
5745 bool agg_values_from_caller = false;
5746 bool agg_jf_preserved = false;
5747 unsigned unit_delta = UINT_MAX;
5748 int src_idx = -1;
5749 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5750 index);
5752 if (jfunc->type == IPA_JF_PASS_THROUGH
5753 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5755 agg_values_from_caller = true;
5756 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5757 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5758 unit_delta = 0;
5760 else if (jfunc->type == IPA_JF_ANCESTOR
5761 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5763 agg_values_from_caller = true;
5764 agg_jf_preserved = true;
5765 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5766 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5769 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5770 if (agg_values_from_caller)
5772 if (caller_info->ipcp_orig_node)
5774 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5775 ipcp_transformation *ts
5776 = ipcp_get_transformation_summary (cs->caller);
5777 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5778 ipcp_param_lattices *orig_plats
5779 = ipa_get_parm_lattices (orig_info, src_idx);
5780 if (ts
5781 && orig_plats->aggs
5782 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5784 ipa_argagg_value_list src (ts);
5785 src.push_adjusted_values (src_idx, index, unit_delta, res);
5786 return;
5789 else
5791 ipcp_param_lattices *src_plats
5792 = ipa_get_parm_lattices (caller_info, src_idx);
5793 if (src_plats->aggs
5794 && !src_plats->aggs_bottom
5795 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5797 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5799 interim->push_adjusted_values (src_idx, index, unit_delta,
5800 res);
5801 return;
5803 if (!src_plats->aggs_contain_variable)
5805 push_agg_values_from_plats (src_plats, index, unit_delta,
5806 res);
5807 return;
5813 if (!jfunc->agg.items)
5814 return;
5815 bool first = true;
5816 unsigned prev_unit_offset = 0;
5817 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5819 tree value, srcvalue;
5820 /* Besides simple pass-through aggregate jump function, arithmetic
5821 aggregate jump function could also bring same aggregate value as
5822 parameter passed-in for self-feeding recursive call. For example,
5824 fn (int *i)
5826 int j = *i & 1;
5827 fn (&j);
5830 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5831 if (interim
5832 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5833 && (srcvalue = interim->get_value(index,
5834 agg_jf.offset / BITS_PER_UNIT)))
5835 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5836 srcvalue,
5837 agg_jf.value.pass_through.operand,
5838 agg_jf.type);
5839 else
5840 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5841 &agg_jf);
5842 if (value)
5844 struct ipa_argagg_value iav;
5845 iav.value = value;
5846 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5847 iav.index = index;
5848 iav.by_ref = jfunc->agg.by_ref;
5850 gcc_assert (first
5851 || iav.unit_offset > prev_unit_offset);
5852 prev_unit_offset = iav.unit_offset;
5853 first = false;
5855 res->safe_push (iav);
5858 return;
5861 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5862 description of ultimate callee of CS or the one it was cloned from (the
5863 summary where lattices are). If INTERIM is non-NULL, it contains the
5864 current interim state of collected aggregate values which can be used to
5865 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5866 is true) and to skip values which clearly will not be part of intersection
5867 with INTERIM. */
5869 static void
5870 push_agg_values_from_edge (struct cgraph_edge *cs,
5871 ipa_node_params *dest_info,
5872 vec<ipa_argagg_value> *res,
5873 const ipa_argagg_value_list *interim,
5874 bool optimize_self_recursion)
5876 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5877 if (!args)
5878 return;
5880 int count = MIN (ipa_get_param_count (dest_info),
5881 ipa_get_cs_argument_count (args));
5883 unsigned interim_index = 0;
5884 for (int index = 0; index < count; index++)
5886 if (interim)
5888 while (interim_index < interim->m_elts.size ()
5889 && interim->m_elts[interim_index].value
5890 && interim->m_elts[interim_index].index < index)
5891 interim_index++;
5892 if (interim_index >= interim->m_elts.size ()
5893 || interim->m_elts[interim_index].index > index)
5894 continue;
5897 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5898 if (!ipa_is_param_used (dest_info, index)
5899 || plats->aggs_bottom)
5900 continue;
5901 push_agg_values_for_index_from_edge (cs, index, res,
5902 optimize_self_recursion ? interim
5903 : NULL);
5908 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5909 from all of them. Return nullptr if there are none. */
5911 static struct vec<ipa_argagg_value, va_gc> *
5912 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5913 const vec<cgraph_edge *> &callers)
5915 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5916 if (dest_info->ipcp_orig_node)
5917 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5919 /* gather_edges_for_value puts a non-recursive call into the first element of
5920 callers if it can. */
5921 auto_vec<ipa_argagg_value, 32> interim;
5922 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5924 unsigned valid_entries = interim.length ();
5925 if (!valid_entries)
5926 return nullptr;
5928 unsigned caller_count = callers.length();
5929 for (unsigned i = 1; i < caller_count; i++)
5931 auto_vec<ipa_argagg_value, 32> last;
5932 ipa_argagg_value_list avs (&interim);
5933 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5935 valid_entries = intersect_argaggs_with (interim, last);
5936 if (!valid_entries)
5937 return nullptr;
5940 vec<ipa_argagg_value, va_gc> *res = NULL;
5941 vec_safe_reserve_exact (res, valid_entries);
5942 for (const ipa_argagg_value &av : interim)
5943 if (av.value)
5944 res->quick_push(av);
5945 gcc_checking_assert (res->length () == valid_entries);
5946 return res;
5949 /* Determine whether CS also brings all scalar values that the NODE is
5950 specialized for. */
5952 static bool
5953 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5954 struct cgraph_node *node)
5956 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5957 int count = ipa_get_param_count (dest_info);
5958 class ipa_node_params *caller_info;
5959 class ipa_edge_args *args;
5960 int i;
5962 caller_info = ipa_node_params_sum->get (cs->caller);
5963 args = ipa_edge_args_sum->get (cs);
5964 for (i = 0; i < count; i++)
5966 struct ipa_jump_func *jump_func;
5967 tree val, t;
5969 val = dest_info->known_csts[i];
5970 if (!val)
5971 continue;
5973 if (i >= ipa_get_cs_argument_count (args))
5974 return false;
5975 jump_func = ipa_get_ith_jump_func (args, i);
5976 t = ipa_value_from_jfunc (caller_info, jump_func,
5977 ipa_get_type (dest_info, i));
5978 if (!t || !values_equal_for_ipcp_p (val, t))
5979 return false;
5981 return true;
5984 /* Determine whether CS also brings all aggregate values that NODE is
5985 specialized for. */
5987 static bool
5988 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5989 struct cgraph_node *node)
5991 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5992 if (!ts || vec_safe_is_empty (ts->m_agg_values))
5993 return true;
5995 const ipa_argagg_value_list existing (ts->m_agg_values);
5996 auto_vec<ipa_argagg_value, 32> edge_values;
5997 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5998 gcc_checking_assert (dest_info->ipcp_orig_node);
5999 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
6000 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
6001 const ipa_argagg_value_list avl (&edge_values);
6002 return avl.superset_of_p (existing);
6005 /* Given an original NODE and a VAL for which we have already created a
6006 specialized clone, look whether there are incoming edges that still lead
6007 into the old node but now also bring the requested value and also conform to
6008 all other criteria such that they can be redirected the special node.
6009 This function can therefore redirect the final edge in a SCC. */
6011 template <typename valtype>
6012 static void
6013 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
6015 ipcp_value_source<valtype> *src;
6016 profile_count redirected_sum = profile_count::zero ();
6018 for (src = val->sources; src; src = src->next)
6020 struct cgraph_edge *cs = src->cs;
6021 while (cs)
6023 if (cgraph_edge_brings_value_p (cs, src, node, val)
6024 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
6025 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
6027 if (dump_file)
6028 fprintf (dump_file, " - adding an extra caller %s of %s\n",
6029 cs->caller->dump_name (),
6030 val->spec_node->dump_name ());
6032 cs->redirect_callee_duplicating_thunks (val->spec_node);
6033 val->spec_node->expand_all_artificial_thunks ();
6034 if (cs->count.ipa ().initialized_p ())
6035 redirected_sum = redirected_sum + cs->count.ipa ();
6037 cs = get_next_cgraph_edge_clone (cs);
6041 if (redirected_sum.nonzero_p ())
6042 update_specialized_profile (val->spec_node, node, redirected_sum);
6045 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6047 static bool
6048 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
6050 ipa_polymorphic_call_context *ctx;
6051 int i;
6053 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
6054 if (!ctx->useless_p ())
6055 return true;
6056 return false;
6059 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6061 static vec<ipa_polymorphic_call_context>
6062 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
6064 if (known_contexts_useful_p (known_contexts))
6065 return known_contexts.copy ();
6066 else
6067 return vNULL;
6070 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6071 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6072 copy too. */
6074 static void
6075 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6076 vec<tree> *known_csts,
6077 vec<ipa_polymorphic_call_context> *known_contexts,
6078 ipcp_value<tree> *val, int index)
6080 *known_csts = avals->m_known_vals.copy ();
6081 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6082 (*known_csts)[index] = val->value;
6085 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6086 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6087 INDEX. */
6089 static void
6090 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6091 vec<tree> *known_csts,
6092 vec<ipa_polymorphic_call_context> *known_contexts,
6093 ipcp_value<ipa_polymorphic_call_context> *val,
6094 int index)
6096 *known_csts = avals->m_known_vals.copy ();
6097 *known_contexts = avals->m_known_contexts.copy ();
6098 (*known_contexts)[index] = val->value;
6101 /* Return true if OFFSET indicates this was not an aggregate value or there is
6102 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6103 AGGVALS list. */
6105 DEBUG_FUNCTION bool
6106 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6107 int index, HOST_WIDE_INT offset, tree value)
6109 if (offset == -1)
6110 return true;
6112 const ipa_argagg_value_list avl (aggvals);
6113 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6114 return v && values_equal_for_ipcp_p (v, value);
6117 /* Return true if offset is minus one because source of a polymorphic context
6118 cannot be an aggregate value. */
6120 DEBUG_FUNCTION bool
6121 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6122 int , HOST_WIDE_INT offset,
6123 ipa_polymorphic_call_context)
6125 return offset == -1;
6128 /* Decide whether to create a special version of NODE for value VAL of
6129 parameter at the given INDEX. If OFFSET is -1, the value is for the
6130 parameter itself, otherwise it is stored at the given OFFSET of the
6131 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6132 is a vector which contains clones created for self-recursive calls with an
6133 arithmetic pass-through jump function. */
6135 template <typename valtype>
6136 static bool
6137 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6138 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6139 vec<cgraph_node *> *self_gen_clones)
6141 int caller_count;
6142 sreal freq_sum;
6143 profile_count count_sum, rec_count_sum;
6144 vec<cgraph_edge *> callers;
6146 if (val->spec_node)
6148 perhaps_add_new_callers (node, val);
6149 return false;
6151 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6153 if (dump_file && (dump_flags & TDF_DETAILS))
6154 fprintf (dump_file, " Ignoring candidate value because "
6155 "maximum unit size would be reached with %li.\n",
6156 val->local_size_cost + overall_size);
6157 return false;
6159 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6160 &rec_count_sum, &count_sum))
6161 return false;
6163 if (!dbg_cnt (ipa_cp_values))
6164 return false;
6166 if (val->self_recursion_generated_p ())
6168 /* The edge counts in this case might not have been adjusted yet.
6169 Nevertleless, even if they were it would be only a guesswork which we
6170 can do now. The recursive part of the counts can be derived from the
6171 count of the original node anyway. */
6172 if (node->count.ipa ().nonzero_p ())
6174 unsigned dem = self_gen_clones->length () + 1;
6175 rec_count_sum = node->count.ipa () / dem;
6177 else
6178 rec_count_sum = profile_count::zero ();
6181 /* get_info_about_necessary_edges only sums up ipa counts. */
6182 count_sum += rec_count_sum;
6184 if (dump_file && (dump_flags & TDF_DETAILS))
6186 fprintf (dump_file, " - considering value ");
6187 print_ipcp_constant_value (dump_file, val->value);
6188 fprintf (dump_file, " for ");
6189 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6190 if (offset != -1)
6191 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6192 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6195 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6196 freq_sum, count_sum,
6197 val->local_size_cost)
6198 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6199 freq_sum, count_sum, val->prop_size_cost))
6200 return false;
6202 if (dump_file)
6203 fprintf (dump_file, " Creating a specialized node of %s.\n",
6204 node->dump_name ());
6206 vec<tree> known_csts;
6207 vec<ipa_polymorphic_call_context> known_contexts;
6209 callers = gather_edges_for_value (val, node, caller_count);
6210 if (offset == -1)
6211 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6212 else
6214 known_csts = avals->m_known_vals.copy ();
6215 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6217 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6218 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6219 vec<ipa_argagg_value, va_gc> *aggvals
6220 = find_aggregate_values_for_callers_subset (node, callers);
6221 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6222 offset, val->value));
6223 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6224 aggvals, callers);
6226 if (val->self_recursion_generated_p ())
6227 self_gen_clones->safe_push (val->spec_node);
6228 else
6229 update_profiling_info (node, val->spec_node);
6231 callers.release ();
6232 overall_size += val->local_size_cost;
6233 if (dump_file && (dump_flags & TDF_DETAILS))
6234 fprintf (dump_file, " overall size reached %li\n",
6235 overall_size);
6237 /* TODO: If for some lattice there is only one other known value
6238 left, make a special node for it too. */
6240 return true;
6243 /* Like irange::contains_p(), but convert VAL to the range of R if
6244 necessary. */
6246 static inline bool
6247 ipa_range_contains_p (const vrange &r, tree val)
6249 if (r.undefined_p ())
6250 return false;
6252 tree type = r.type ();
6253 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6254 return false;
6256 val = fold_convert (type, val);
6257 return r.contains_p (val);
6260 /* Decide whether and what specialized clones of NODE should be created. */
6262 static bool
6263 decide_whether_version_node (struct cgraph_node *node)
6265 ipa_node_params *info = ipa_node_params_sum->get (node);
6266 int i, count = ipa_get_param_count (info);
6267 bool ret = false;
6269 if (count == 0)
6270 return false;
6272 if (dump_file && (dump_flags & TDF_DETAILS))
6273 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6274 node->dump_name ());
6276 auto_vec <cgraph_node *, 9> self_gen_clones;
6277 ipa_auto_call_arg_values avals;
6278 gather_context_independent_values (info, &avals, false, NULL);
6280 for (i = 0; i < count;i++)
6282 if (!ipa_is_param_used (info, i))
6283 continue;
6285 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6286 ipcp_lattice<tree> *lat = &plats->itself;
6287 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6289 if (!lat->bottom
6290 && !avals.m_known_vals[i])
6292 ipcp_value<tree> *val;
6293 for (val = lat->values; val; val = val->next)
6295 /* If some values generated for self-recursive calls with
6296 arithmetic jump functions fall outside of the known
6297 range for the parameter, we can skip them. */
6298 if (TREE_CODE (val->value) == INTEGER_CST
6299 && !plats->m_value_range.bottom_p ()
6300 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6301 val->value))
6303 /* This can happen also if a constant present in the source
6304 code falls outside of the range of parameter's type, so we
6305 cannot assert. */
6306 if (dump_file && (dump_flags & TDF_DETAILS))
6308 fprintf (dump_file, " - skipping%s value ",
6309 val->self_recursion_generated_p ()
6310 ? " self_recursion_generated" : "");
6311 print_ipcp_constant_value (dump_file, val->value);
6312 fprintf (dump_file, " because it is outside known "
6313 "value range.\n");
6315 continue;
6317 ret |= decide_about_value (node, i, -1, val, &avals,
6318 &self_gen_clones);
6322 if (!plats->aggs_bottom)
6324 struct ipcp_agg_lattice *aglat;
6325 ipcp_value<tree> *val;
6326 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6327 if (!aglat->bottom && aglat->values
6328 /* If the following is false, the one value has been considered
6329 for cloning for all contexts. */
6330 && (plats->aggs_contain_variable
6331 || !aglat->is_single_const ()))
6332 for (val = aglat->values; val; val = val->next)
6333 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6334 &self_gen_clones);
6337 if (!ctxlat->bottom
6338 && avals.m_known_contexts[i].useless_p ())
6340 ipcp_value<ipa_polymorphic_call_context> *val;
6341 for (val = ctxlat->values; val; val = val->next)
6342 ret |= decide_about_value (node, i, -1, val, &avals,
6343 &self_gen_clones);
6347 if (!self_gen_clones.is_empty ())
6349 self_gen_clones.safe_push (node);
6350 update_counts_for_self_gen_clones (node, self_gen_clones);
6353 if (info->do_clone_for_all_contexts)
6355 if (!dbg_cnt (ipa_cp_values))
6357 info->do_clone_for_all_contexts = false;
6358 return ret;
6361 struct cgraph_node *clone;
6362 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6364 for (int i = callers.length () - 1; i >= 0; i--)
6366 cgraph_edge *cs = callers[i];
6367 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6369 if (caller_info && caller_info->node_dead)
6370 callers.unordered_remove (i);
6373 if (!adjust_callers_for_value_intersection (callers, node))
6375 /* If node is not called by anyone, or all its caller edges are
6376 self-recursive, the node is not really in use, no need to do
6377 cloning. */
6378 info->do_clone_for_all_contexts = false;
6379 return ret;
6382 if (dump_file)
6383 fprintf (dump_file, " - Creating a specialized node of %s "
6384 "for all known contexts.\n", node->dump_name ());
6386 vec<tree> known_csts = avals.m_known_vals.copy ();
6387 vec<ipa_polymorphic_call_context> known_contexts
6388 = copy_useful_known_contexts (avals.m_known_contexts);
6389 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6390 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6391 vec<ipa_argagg_value, va_gc> *aggvals
6392 = find_aggregate_values_for_callers_subset (node, callers);
6394 if (!known_contexts_useful_p (known_contexts))
6396 known_contexts.release ();
6397 known_contexts = vNULL;
6399 clone = create_specialized_node (node, known_csts, known_contexts,
6400 aggvals, callers);
6401 info->do_clone_for_all_contexts = false;
6402 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6403 ret = true;
6406 return ret;
6409 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6411 static void
6412 spread_undeadness (struct cgraph_node *node)
6414 struct cgraph_edge *cs;
6416 for (cs = node->callees; cs; cs = cs->next_callee)
6417 if (ipa_edge_within_scc (cs))
6419 struct cgraph_node *callee;
6420 class ipa_node_params *info;
6422 callee = cs->callee->function_symbol (NULL);
6423 info = ipa_node_params_sum->get (callee);
6425 if (info && info->node_dead)
6427 info->node_dead = 0;
6428 spread_undeadness (callee);
6433 /* Return true if NODE has a caller from outside of its SCC that is not
6434 dead. Worker callback for cgraph_for_node_and_aliases. */
6436 static bool
6437 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6438 void *data ATTRIBUTE_UNUSED)
6440 struct cgraph_edge *cs;
6442 for (cs = node->callers; cs; cs = cs->next_caller)
6443 if (cs->caller->thunk
6444 && cs->caller->call_for_symbol_thunks_and_aliases
6445 (has_undead_caller_from_outside_scc_p, NULL, true))
6446 return true;
6447 else if (!ipa_edge_within_scc (cs))
6449 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6450 if (!caller_info /* Unoptimized caller are like dead ones. */
6451 || !caller_info->node_dead)
6452 return true;
6454 return false;
6458 /* Identify nodes within the same SCC as NODE which are no longer needed
6459 because of new clones and will be removed as unreachable. */
6461 static void
6462 identify_dead_nodes (struct cgraph_node *node)
6464 struct cgraph_node *v;
6465 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6466 if (v->local)
6468 ipa_node_params *info = ipa_node_params_sum->get (v);
6469 if (info
6470 && !v->call_for_symbol_thunks_and_aliases
6471 (has_undead_caller_from_outside_scc_p, NULL, true))
6472 info->node_dead = 1;
6475 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6477 ipa_node_params *info = ipa_node_params_sum->get (v);
6478 if (info && !info->node_dead)
6479 spread_undeadness (v);
6482 if (dump_file && (dump_flags & TDF_DETAILS))
6484 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6485 if (ipa_node_params_sum->get (v)
6486 && ipa_node_params_sum->get (v)->node_dead)
6487 fprintf (dump_file, " Marking node as dead: %s.\n",
6488 v->dump_name ());
6492 /* The decision stage. Iterate over the topological order of call graph nodes
6493 TOPO and make specialized clones if deemed beneficial. */
6495 static void
6496 ipcp_decision_stage (class ipa_topo_info *topo)
6498 int i;
6500 if (dump_file)
6501 fprintf (dump_file, "\nIPA decision stage:\n\n");
6503 for (i = topo->nnodes - 1; i >= 0; i--)
6505 struct cgraph_node *node = topo->order[i];
6506 bool change = false, iterate = true;
6508 while (iterate)
6510 struct cgraph_node *v;
6511 iterate = false;
6512 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6513 if (v->has_gimple_body_p ()
6514 && ipcp_versionable_function_p (v))
6515 iterate |= decide_whether_version_node (v);
6517 change |= iterate;
6519 if (change)
6520 identify_dead_nodes (node);
6524 /* Look up all the bits information that we have discovered and copy it over
6525 to the transformation summary. */
6527 static void
6528 ipcp_store_bits_results (void)
6530 cgraph_node *node;
6532 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6534 ipa_node_params *info = ipa_node_params_sum->get (node);
6535 bool dumped_sth = false;
6536 bool found_useful_result = false;
6538 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6540 if (dump_file)
6541 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6542 "; -fipa-bit-cp: disabled.\n",
6543 node->dump_name ());
6544 continue;
6547 if (info->ipcp_orig_node)
6548 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6549 if (!info->lattices)
6550 /* Newly expanded artificial thunks do not have lattices. */
6551 continue;
6553 unsigned count = ipa_get_param_count (info);
6554 for (unsigned i = 0; i < count; i++)
6556 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6557 if (plats->bits_lattice.constant_p ())
6559 found_useful_result = true;
6560 break;
6564 if (!found_useful_result)
6565 continue;
6567 ipcp_transformation_initialize ();
6568 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6569 vec_safe_reserve_exact (ts->bits, count);
6571 for (unsigned i = 0; i < count; i++)
6573 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6574 ipa_bits *jfbits;
6576 if (plats->bits_lattice.constant_p ())
6578 jfbits
6579 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6580 plats->bits_lattice.get_mask ());
6581 if (!dbg_cnt (ipa_cp_bits))
6582 jfbits = NULL;
6584 else
6585 jfbits = NULL;
6587 ts->bits->quick_push (jfbits);
6588 if (!dump_file || !jfbits)
6589 continue;
6590 if (!dumped_sth)
6592 fprintf (dump_file, "Propagated bits info for function %s:\n",
6593 node->dump_name ());
6594 dumped_sth = true;
6596 fprintf (dump_file, " param %i: value = ", i);
6597 print_hex (jfbits->value, dump_file);
6598 fprintf (dump_file, ", mask = ");
6599 print_hex (jfbits->mask, dump_file);
6600 fprintf (dump_file, "\n");
6605 /* Look up all VR information that we have discovered and copy it over
6606 to the transformation summary. */
6608 static void
6609 ipcp_store_vr_results (void)
6611 cgraph_node *node;
6613 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6615 ipa_node_params *info = ipa_node_params_sum->get (node);
6616 bool found_useful_result = false;
6618 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6620 if (dump_file)
6621 fprintf (dump_file, "Not considering %s for VR discovery "
6622 "and propagate; -fipa-ipa-vrp: disabled.\n",
6623 node->dump_name ());
6624 continue;
6627 if (info->ipcp_orig_node)
6628 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6629 if (!info->lattices)
6630 /* Newly expanded artificial thunks do not have lattices. */
6631 continue;
6633 unsigned count = ipa_get_param_count (info);
6634 for (unsigned i = 0; i < count; i++)
6636 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6637 if (!plats->m_value_range.bottom_p ()
6638 && !plats->m_value_range.top_p ())
6640 found_useful_result = true;
6641 break;
6644 if (!found_useful_result)
6645 continue;
6647 ipcp_transformation_initialize ();
6648 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6649 vec_safe_reserve_exact (ts->m_vr, count);
6651 for (unsigned i = 0; i < count; i++)
6653 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6655 if (!plats->m_value_range.bottom_p ()
6656 && !plats->m_value_range.top_p ()
6657 && dbg_cnt (ipa_cp_vr))
6659 ipa_vr vr (plats->m_value_range.m_vr);
6660 ts->m_vr->quick_push (vr);
6662 else
6664 ipa_vr vr;
6665 ts->m_vr->quick_push (vr);
6671 /* The IPCP driver. */
6673 static unsigned int
6674 ipcp_driver (void)
6676 class ipa_topo_info topo;
6678 if (edge_clone_summaries == NULL)
6679 edge_clone_summaries = new edge_clone_summary_t (symtab);
6681 ipa_check_create_node_params ();
6682 ipa_check_create_edge_args ();
6683 clone_num_suffixes = new hash_map<const char *, unsigned>;
6685 if (dump_file)
6687 fprintf (dump_file, "\nIPA structures before propagation:\n");
6688 if (dump_flags & TDF_DETAILS)
6689 ipa_print_all_params (dump_file);
6690 ipa_print_all_jump_functions (dump_file);
6693 /* Topological sort. */
6694 build_toporder_info (&topo);
6695 /* Do the interprocedural propagation. */
6696 ipcp_propagate_stage (&topo);
6697 /* Decide what constant propagation and cloning should be performed. */
6698 ipcp_decision_stage (&topo);
6699 /* Store results of bits propagation. */
6700 ipcp_store_bits_results ();
6701 /* Store results of value range propagation. */
6702 ipcp_store_vr_results ();
6704 /* Free all IPCP structures. */
6705 delete clone_num_suffixes;
6706 free_toporder_info (&topo);
6707 delete edge_clone_summaries;
6708 edge_clone_summaries = NULL;
6709 ipa_free_all_structures_after_ipa_cp ();
6710 if (dump_file)
6711 fprintf (dump_file, "\nIPA constant propagation end\n");
6712 return 0;
6715 /* Initialization and computation of IPCP data structures. This is the initial
6716 intraprocedural analysis of functions, which gathers information to be
6717 propagated later on. */
6719 static void
6720 ipcp_generate_summary (void)
6722 struct cgraph_node *node;
6724 if (dump_file)
6725 fprintf (dump_file, "\nIPA constant propagation start:\n");
6726 ipa_register_cgraph_hooks ();
6728 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6729 ipa_analyze_node (node);
6732 namespace {
6734 const pass_data pass_data_ipa_cp =
6736 IPA_PASS, /* type */
6737 "cp", /* name */
6738 OPTGROUP_NONE, /* optinfo_flags */
6739 TV_IPA_CONSTANT_PROP, /* tv_id */
6740 0, /* properties_required */
6741 0, /* properties_provided */
6742 0, /* properties_destroyed */
6743 0, /* todo_flags_start */
6744 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6747 class pass_ipa_cp : public ipa_opt_pass_d
6749 public:
6750 pass_ipa_cp (gcc::context *ctxt)
6751 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6752 ipcp_generate_summary, /* generate_summary */
6753 NULL, /* write_summary */
6754 NULL, /* read_summary */
6755 ipcp_write_transformation_summaries, /*
6756 write_optimization_summary */
6757 ipcp_read_transformation_summaries, /*
6758 read_optimization_summary */
6759 NULL, /* stmt_fixup */
6760 0, /* function_transform_todo_flags_start */
6761 ipcp_transform_function, /* function_transform */
6762 NULL) /* variable_transform */
6765 /* opt_pass methods: */
6766 bool gate (function *) final override
6768 /* FIXME: We should remove the optimize check after we ensure we never run
6769 IPA passes when not optimizing. */
6770 return (flag_ipa_cp && optimize) || in_lto_p;
6773 unsigned int execute (function *) final override { return ipcp_driver (); }
6775 }; // class pass_ipa_cp
6777 } // anon namespace
6779 ipa_opt_pass_d *
6780 make_pass_ipa_cp (gcc::context *ctxt)
6782 return new pass_ipa_cp (ctxt);
6785 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6786 within the same process. For use by toplev::finalize. */
6788 void
6789 ipa_cp_cc_finalize (void)
6791 base_count = profile_count::uninitialized ();
6792 overall_size = 0;
6793 orig_overall_size = 0;
6794 ipcp_free_transformation_sum ();
6797 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6798 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6799 DECL_UID-sorted vector if available (which is pre-computed only if there are
6800 many parameters). Can return -1 if param is static chain not represented
6801 among DECL_ARGUMENTS. */
6804 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6806 gcc_assert (TREE_CODE (param) == PARM_DECL);
6807 if (m_uid_to_idx)
6809 unsigned puid = DECL_UID (param);
6810 const ipa_uid_to_idx_map_elt *res
6811 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6812 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6814 return elt.uid < uid;
6816 if (res == m_uid_to_idx->end ()
6817 || res->uid != puid)
6819 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6820 return -1;
6822 return res->index;
6825 unsigned index = 0;
6826 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6827 if (p == param)
6828 return (int) index;
6830 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6831 return -1;
6834 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6835 according to the uid. */
6837 static int
6838 compare_uids (const void *a, const void *b)
6840 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6841 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6842 if (e1->uid < e2->uid)
6843 return -1;
6844 if (e1->uid > e2->uid)
6845 return 1;
6846 gcc_unreachable ();
6849 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6850 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6851 from parameters to their indices in DECL_ARGUMENTS chain. */
6853 void
6854 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6856 int c = count_formal_params (fndecl);
6857 if (c < 32)
6858 return;
6860 m_uid_to_idx = NULL;
6861 vec_safe_reserve (m_uid_to_idx, c, true);
6862 unsigned index = 0;
6863 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6865 ipa_uid_to_idx_map_elt elt;
6866 elt.uid = DECL_UID (p);
6867 elt.index = index;
6868 m_uid_to_idx->quick_push (elt);
6870 m_uid_to_idx->qsort (compare_uids);