ada: Reorder components in Ada.Containers.Bounded_Doubly_Linked_Lists
[official-gcc.git] / gcc / ipa-cp.cc
blob0f37bb5e336041b67db4bfbc9fa6586314ebd090
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype> class ipcp_value;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype>
138 struct ipcp_value_source
140 public:
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset;
144 /* The incoming edge that brought the value. */
145 cgraph_edge *cs;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value<valtype> *val;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source *next;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
155 int index;
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
162 public:
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit;
166 /* Time benefit that specializing the function for this value can bring about
167 in it's callees. */
168 sreal prop_time_benefit;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
171 int local_size_cost;
172 /* Size cost that specializing the function for this value can bring about in
173 it's callees. */
174 int prop_size_cost;
176 ipcp_value_base ()
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype>
184 class ipcp_value : public ipcp_value_base
186 public:
187 /* The actual value for the given parameter. */
188 valtype value;
189 /* The list of sources from which this value originates. */
190 ipcp_value_source <valtype> *sources = nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value *next = nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
194 of values. */
195 ipcp_value *scc_next = nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value *topo_next = nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
200 created. */
201 cgraph_node *spec_node = nullptr;
202 /* Depth first search number and low link for topological sorting of
203 values. */
204 int dfs = 0;
205 int low_link = 0;
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
208 int scc_no = 0;
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
213 maximum. */
214 unsigned self_recursion_generated_level = 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack = false;
218 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
219 HOST_WIDE_INT offset);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value<valtype> *o)
225 return o->scc_no == scc_no;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level > 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype>
245 struct ipcp_lattice
247 public:
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value<valtype> *values;
252 /* Number of known values and types in this lattice. */
253 int values_count;
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 propagation). */
258 bool bottom;
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval, cgraph_edge *cs,
264 ipcp_value<valtype> *src_val = NULL,
265 int src_idx = 0, HOST_WIDE_INT offset = -1,
266 ipcp_value<valtype> **val_p = NULL,
267 unsigned same_lat_gen_level = 0);
268 void print (FILE * f, bool dump_sources, bool dump_benefits);
271 /* Lattice of tree values with an offset to describe a part of an
272 aggregate. */
274 struct ipcp_agg_lattice : public ipcp_lattice<tree>
276 public:
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
281 HOST_WIDE_INT size;
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice *next;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
288 value are constant.
289 For eg:
290 int f(int x)
292 return some_op (x);
295 int f1(int y)
297 if (cond)
298 return f (y & 0xff);
299 else
300 return f (y & 0xf);
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
311 public:
312 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
313 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
314 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int, widest_int);
317 bool known_nonzero_p () const;
319 widest_int get_value () const { return m_value; }
320 widest_int get_mask () const { return m_mask; }
322 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
323 enum tree_code, tree, bool);
325 bool meet_with (widest_int, widest_int, unsigned);
327 void print (FILE *);
329 private:
330 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value, m_mask;
337 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
338 void get_value_and_mask (tree, widest_int *, widest_int *);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
345 public:
346 value_range m_vr;
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const value_range *p_vr);
352 bool meet_with (const ipcp_vr_lattice &other);
353 void init () { gcc_assert (m_vr.undefined_p ()); }
354 void print (FILE * f);
356 private:
357 bool meet_with_1 (const value_range *other_vr);
360 /* Structure containing lattices for a parameter itself and for pieces of
361 aggregates that are passed in the parameter or by a reference in a parameter
362 plus some other useful flags. */
364 class ipcp_param_lattices
366 public:
367 /* Lattice describing the value of the parameter itself. */
368 ipcp_lattice<tree> itself;
369 /* Lattice describing the polymorphic contexts of a parameter. */
370 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
371 /* Lattices describing aggregate parts. */
372 ipcp_agg_lattice *aggs;
373 /* Lattice describing known bits. */
374 ipcp_bits_lattice bits_lattice;
375 /* Lattice describing value range. */
376 ipcp_vr_lattice m_value_range;
377 /* Number of aggregate lattices */
378 int aggs_count;
379 /* True if aggregate data were passed by reference (as opposed to by
380 value). */
381 bool aggs_by_ref;
382 /* All aggregate lattices contain a variable component (in addition to
383 values). */
384 bool aggs_contain_variable;
385 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
386 for any propagation). */
387 bool aggs_bottom;
389 /* There is a virtual call based on this parameter. */
390 bool virt_call;
393 /* Allocation pools for values and their sources in ipa-cp. */
395 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
396 ("IPA-CP constant values");
398 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
399 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
401 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
402 ("IPA-CP value sources");
404 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
405 ("IPA_CP aggregate lattices");
407 /* Base count to use in heuristics when using profile feedback. */
409 static profile_count base_count;
411 /* Original overall size of the program. */
413 static long overall_size, orig_overall_size;
415 /* Node name to unique clone suffix number map. */
416 static hash_map<const char *, unsigned> *clone_num_suffixes;
418 /* Return the param lattices structure corresponding to the Ith formal
419 parameter of the function described by INFO. */
420 static inline class ipcp_param_lattices *
421 ipa_get_parm_lattices (class ipa_node_params *info, int i)
423 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
424 gcc_checking_assert (!info->ipcp_orig_node);
425 gcc_checking_assert (info->lattices);
426 return &(info->lattices[i]);
429 /* Return the lattice corresponding to the scalar value of the Ith formal
430 parameter of the function described by INFO. */
431 static inline ipcp_lattice<tree> *
432 ipa_get_scalar_lat (class ipa_node_params *info, int i)
434 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
435 return &plats->itself;
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice<ipa_polymorphic_call_context> *
441 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
443 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
444 return &plats->ctxlat;
447 /* Return whether LAT is a lattice with a single constant and without an
448 undefined value. */
450 template <typename valtype>
451 inline bool
452 ipcp_lattice<valtype>::is_single_const ()
454 if (bottom || contains_variable || values_count != 1)
455 return false;
456 else
457 return true;
460 /* Return true iff X and Y should be considered equal values by IPA-CP. */
462 static bool
463 values_equal_for_ipcp_p (tree x, tree y)
465 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
467 if (x == y)
468 return true;
470 if (TREE_CODE (x) == ADDR_EXPR
471 && TREE_CODE (y) == ADDR_EXPR
472 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
473 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
474 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
475 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
476 else
477 return operand_equal_p (x, y, 0);
480 /* Print V which is extracted from a value in a lattice to F. */
482 static void
483 print_ipcp_constant_value (FILE * f, tree v)
485 if (TREE_CODE (v) == ADDR_EXPR
486 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
488 fprintf (f, "& ");
489 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
491 else
492 print_generic_expr (f, v);
495 /* Print V which is extracted from a value in a lattice to F. */
497 static void
498 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
500 v.dump(f, false);
503 /* Print a lattice LAT to F. */
505 template <typename valtype>
506 void
507 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
509 ipcp_value<valtype> *val;
510 bool prev = false;
512 if (bottom)
514 fprintf (f, "BOTTOM\n");
515 return;
518 if (!values_count && !contains_variable)
520 fprintf (f, "TOP\n");
521 return;
524 if (contains_variable)
526 fprintf (f, "VARIABLE");
527 prev = true;
528 if (dump_benefits)
529 fprintf (f, "\n");
532 for (val = values; val; val = val->next)
534 if (dump_benefits && prev)
535 fprintf (f, " ");
536 else if (!dump_benefits && prev)
537 fprintf (f, ", ");
538 else
539 prev = true;
541 print_ipcp_constant_value (f, val->value);
543 if (dump_sources)
545 ipcp_value_source<valtype> *s;
547 if (val->self_recursion_generated_p ())
548 fprintf (f, " [self_gen(%i), from:",
549 val->self_recursion_generated_level);
550 else
551 fprintf (f, " [scc: %i, from:", val->scc_no);
552 for (s = val->sources; s; s = s->next)
553 fprintf (f, " %i(%f)", s->cs->caller->order,
554 s->cs->sreal_frequency ().to_double ());
555 fprintf (f, "]");
558 if (dump_benefits)
559 fprintf (f, " [loc_time: %g, loc_size: %i, "
560 "prop_time: %g, prop_size: %i]\n",
561 val->local_time_benefit.to_double (), val->local_size_cost,
562 val->prop_time_benefit.to_double (), val->prop_size_cost);
564 if (!dump_benefits)
565 fprintf (f, "\n");
568 void
569 ipcp_bits_lattice::print (FILE *f)
571 if (top_p ())
572 fprintf (f, " Bits unknown (TOP)\n");
573 else if (bottom_p ())
574 fprintf (f, " Bits unusable (BOTTOM)\n");
575 else
577 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
578 fprintf (f, ", mask = "); print_hex (get_mask (), f);
579 fprintf (f, "\n");
583 /* Print value range lattice to F. */
585 void
586 ipcp_vr_lattice::print (FILE * f)
588 dump_value_range (f, &m_vr);
591 /* Print all ipcp_lattices of all functions to F. */
593 static void
594 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
596 struct cgraph_node *node;
597 int i, count;
599 fprintf (f, "\nLattices:\n");
600 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
602 class ipa_node_params *info;
604 info = ipa_node_params_sum->get (node);
605 /* Skip unoptimized functions and constprop clones since we don't make
606 lattices for them. */
607 if (!info || info->ipcp_orig_node)
608 continue;
609 fprintf (f, " Node: %s:\n", node->dump_name ());
610 count = ipa_get_param_count (info);
611 for (i = 0; i < count; i++)
613 struct ipcp_agg_lattice *aglat;
614 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
615 fprintf (f, " param [%d]: ", i);
616 plats->itself.print (f, dump_sources, dump_benefits);
617 fprintf (f, " ctxs: ");
618 plats->ctxlat.print (f, dump_sources, dump_benefits);
619 plats->bits_lattice.print (f);
620 fprintf (f, " ");
621 plats->m_value_range.print (f);
622 fprintf (f, "\n");
623 if (plats->virt_call)
624 fprintf (f, " virt_call flag set\n");
626 if (plats->aggs_bottom)
628 fprintf (f, " AGGS BOTTOM\n");
629 continue;
631 if (plats->aggs_contain_variable)
632 fprintf (f, " AGGS VARIABLE\n");
633 for (aglat = plats->aggs; aglat; aglat = aglat->next)
635 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
636 plats->aggs_by_ref ? "ref " : "", aglat->offset);
637 aglat->print (f, dump_sources, dump_benefits);
643 /* Determine whether it is at all technically possible to create clones of NODE
644 and store this information in the ipa_node_params structure associated
645 with NODE. */
647 static void
648 determine_versionability (struct cgraph_node *node,
649 class ipa_node_params *info)
651 const char *reason = NULL;
653 /* There are a number of generic reasons functions cannot be versioned. We
654 also cannot remove parameters if there are type attributes such as fnspec
655 present. */
656 if (node->alias || node->thunk)
657 reason = "alias or thunk";
658 else if (!node->versionable)
659 reason = "not a tree_versionable_function";
660 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
661 reason = "insufficient body availability";
662 else if (!opt_for_fn (node->decl, optimize)
663 || !opt_for_fn (node->decl, flag_ipa_cp))
664 reason = "non-optimized function";
665 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
667 /* Ideally we should clone the SIMD clones themselves and create
668 vector copies of them, so IPA-cp and SIMD clones can happily
669 coexist, but that may not be worth the effort. */
670 reason = "function has SIMD clones";
672 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
674 /* Ideally we should clone the target clones themselves and create
675 copies of them, so IPA-cp and target clones can happily
676 coexist, but that may not be worth the effort. */
677 reason = "function target_clones attribute";
679 /* Don't clone decls local to a comdat group; it breaks and for C++
680 decloned constructors, inlining is always better anyway. */
681 else if (node->comdat_local_p ())
682 reason = "comdat-local function";
683 else if (node->calls_comdat_local)
685 /* TODO: call is versionable if we make sure that all
686 callers are inside of a comdat group. */
687 reason = "calls comdat-local function";
690 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
691 work only when inlined. Cloning them may still lead to better code
692 because ipa-cp will not give up on cloning further. If the function is
693 external this however leads to wrong code because we may end up producing
694 offline copy of the function. */
695 if (DECL_EXTERNAL (node->decl))
696 for (cgraph_edge *edge = node->callees; !reason && edge;
697 edge = edge->next_callee)
698 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
700 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
701 reason = "external function which calls va_arg_pack";
702 if (DECL_FUNCTION_CODE (edge->callee->decl)
703 == BUILT_IN_VA_ARG_PACK_LEN)
704 reason = "external function which calls va_arg_pack_len";
707 if (reason && dump_file && !node->alias && !node->thunk)
708 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
709 node->dump_name (), reason);
711 info->versionable = (reason == NULL);
714 /* Return true if it is at all technically possible to create clones of a
715 NODE. */
717 static bool
718 ipcp_versionable_function_p (struct cgraph_node *node)
720 ipa_node_params *info = ipa_node_params_sum->get (node);
721 return info && info->versionable;
724 /* Structure holding accumulated information about callers of a node. */
726 struct caller_statistics
728 /* If requested (see below), self-recursive call counts are summed into this
729 field. */
730 profile_count rec_count_sum;
731 /* The sum of all ipa counts of all the other (non-recursive) calls. */
732 profile_count count_sum;
733 /* Sum of all frequencies for all calls. */
734 sreal freq_sum;
735 /* Number of calls and hot calls respectively. */
736 int n_calls, n_hot_calls;
737 /* If itself is set up, also count the number of non-self-recursive
738 calls. */
739 int n_nonrec_calls;
740 /* If non-NULL, this is the node itself and calls from it should have their
741 counts included in rec_count_sum and not count_sum. */
742 cgraph_node *itself;
745 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
746 from IGNORED_CALLER are not counted. */
748 static inline void
749 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
751 stats->rec_count_sum = profile_count::zero ();
752 stats->count_sum = profile_count::zero ();
753 stats->n_calls = 0;
754 stats->n_hot_calls = 0;
755 stats->n_nonrec_calls = 0;
756 stats->freq_sum = 0;
757 stats->itself = itself;
760 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
761 non-thunk incoming edges to NODE. */
763 static bool
764 gather_caller_stats (struct cgraph_node *node, void *data)
766 struct caller_statistics *stats = (struct caller_statistics *) data;
767 struct cgraph_edge *cs;
769 for (cs = node->callers; cs; cs = cs->next_caller)
770 if (!cs->caller->thunk)
772 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
773 if (info && info->node_dead)
774 continue;
776 if (cs->count.ipa ().initialized_p ())
778 if (stats->itself && stats->itself == cs->caller)
779 stats->rec_count_sum += cs->count.ipa ();
780 else
781 stats->count_sum += cs->count.ipa ();
783 stats->freq_sum += cs->sreal_frequency ();
784 stats->n_calls++;
785 if (stats->itself && stats->itself != cs->caller)
786 stats->n_nonrec_calls++;
788 if (cs->maybe_hot_p ())
789 stats->n_hot_calls ++;
791 return false;
795 /* Return true if this NODE is viable candidate for cloning. */
797 static bool
798 ipcp_cloning_candidate_p (struct cgraph_node *node)
800 struct caller_statistics stats;
802 gcc_checking_assert (node->has_gimple_body_p ());
804 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
806 if (dump_file)
807 fprintf (dump_file, "Not considering %s for cloning; "
808 "-fipa-cp-clone disabled.\n",
809 node->dump_name ());
810 return false;
813 if (node->optimize_for_size_p ())
815 if (dump_file)
816 fprintf (dump_file, "Not considering %s for cloning; "
817 "optimizing it for size.\n",
818 node->dump_name ());
819 return false;
822 init_caller_stats (&stats);
823 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
825 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
827 if (dump_file)
828 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
829 node->dump_name ());
830 return true;
833 /* When profile is available and function is hot, propagate into it even if
834 calls seems cold; constant propagation can improve function's speed
835 significantly. */
836 if (stats.count_sum > profile_count::zero ()
837 && node->count.ipa ().initialized_p ())
839 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
841 if (dump_file)
842 fprintf (dump_file, "Considering %s for cloning; "
843 "usually called directly.\n",
844 node->dump_name ());
845 return true;
848 if (!stats.n_hot_calls)
850 if (dump_file)
851 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
852 node->dump_name ());
853 return false;
855 if (dump_file)
856 fprintf (dump_file, "Considering %s for cloning.\n",
857 node->dump_name ());
858 return true;
861 template <typename valtype>
862 class value_topo_info
864 public:
865 /* Head of the linked list of topologically sorted values. */
866 ipcp_value<valtype> *values_topo;
867 /* Stack for creating SCCs, represented by a linked list too. */
868 ipcp_value<valtype> *stack;
869 /* Counter driving the algorithm in add_val_to_toposort. */
870 int dfs_counter;
872 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
874 void add_val (ipcp_value<valtype> *cur_val);
875 void propagate_effects ();
878 /* Arrays representing a topological ordering of call graph nodes and a stack
879 of nodes used during constant propagation and also data required to perform
880 topological sort of values and propagation of benefits in the determined
881 order. */
883 class ipa_topo_info
885 public:
886 /* Array with obtained topological order of cgraph nodes. */
887 struct cgraph_node **order;
888 /* Stack of cgraph nodes used during propagation within SCC until all values
889 in the SCC stabilize. */
890 struct cgraph_node **stack;
891 int nnodes, stack_top;
893 value_topo_info<tree> constants;
894 value_topo_info<ipa_polymorphic_call_context> contexts;
896 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
897 constants ()
901 /* Skip edges from and to nodes without ipa_cp enabled.
902 Ignore not available symbols. */
904 static bool
905 ignore_edge_p (cgraph_edge *e)
907 enum availability avail;
908 cgraph_node *ultimate_target
909 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
911 return (avail <= AVAIL_INTERPOSABLE
912 || !opt_for_fn (ultimate_target->decl, optimize)
913 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
916 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
918 static void
919 build_toporder_info (class ipa_topo_info *topo)
921 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
922 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
924 gcc_checking_assert (topo->stack_top == 0);
925 topo->nnodes = ipa_reduced_postorder (topo->order, true,
926 ignore_edge_p);
929 /* Free information about strongly connected components and the arrays in
930 TOPO. */
932 static void
933 free_toporder_info (class ipa_topo_info *topo)
935 ipa_free_postorder_info ();
936 free (topo->order);
937 free (topo->stack);
940 /* Add NODE to the stack in TOPO, unless it is already there. */
942 static inline void
943 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
945 ipa_node_params *info = ipa_node_params_sum->get (node);
946 if (info->node_enqueued)
947 return;
948 info->node_enqueued = 1;
949 topo->stack[topo->stack_top++] = node;
952 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
953 is empty. */
955 static struct cgraph_node *
956 pop_node_from_stack (class ipa_topo_info *topo)
958 if (topo->stack_top)
960 struct cgraph_node *node;
961 topo->stack_top--;
962 node = topo->stack[topo->stack_top];
963 ipa_node_params_sum->get (node)->node_enqueued = 0;
964 return node;
966 else
967 return NULL;
970 /* Set lattice LAT to bottom and return true if it previously was not set as
971 such. */
973 template <typename valtype>
974 inline bool
975 ipcp_lattice<valtype>::set_to_bottom ()
977 bool ret = !bottom;
978 bottom = true;
979 return ret;
982 /* Mark lattice as containing an unknown value and return true if it previously
983 was not marked as such. */
985 template <typename valtype>
986 inline bool
987 ipcp_lattice<valtype>::set_contains_variable ()
989 bool ret = !contains_variable;
990 contains_variable = true;
991 return ret;
994 /* Set all aggregate lattices in PLATS to bottom and return true if they were
995 not previously set as such. */
997 static inline bool
998 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
1000 bool ret = !plats->aggs_bottom;
1001 plats->aggs_bottom = true;
1002 return ret;
1005 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1006 return true if they were not previously marked as such. */
1008 static inline bool
1009 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1011 bool ret = !plats->aggs_contain_variable;
1012 plats->aggs_contain_variable = true;
1013 return ret;
1016 bool
1017 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1019 return meet_with_1 (&other.m_vr);
1022 /* Meet the current value of the lattice with value range described by VR
1023 lattice. */
1025 bool
1026 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1028 return meet_with_1 (p_vr);
1031 /* Meet the current value of the lattice with value range described by
1032 OTHER_VR lattice. Return TRUE if anything changed. */
1034 bool
1035 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1037 if (bottom_p ())
1038 return false;
1040 if (other_vr->varying_p ())
1041 return set_to_bottom ();
1043 bool res;
1044 if (flag_checking)
1046 value_range save (m_vr);
1047 res = m_vr.union_ (*other_vr);
1048 gcc_assert (res == (m_vr != save));
1050 else
1051 res = m_vr.union_ (*other_vr);
1052 return res;
1055 /* Return true if value range information in the lattice is yet unknown. */
1057 bool
1058 ipcp_vr_lattice::top_p () const
1060 return m_vr.undefined_p ();
1063 /* Return true if value range information in the lattice is known to be
1064 unusable. */
1066 bool
1067 ipcp_vr_lattice::bottom_p () const
1069 return m_vr.varying_p ();
1072 /* Set value range information in the lattice to bottom. Return true if it
1073 previously was in a different state. */
1075 bool
1076 ipcp_vr_lattice::set_to_bottom ()
1078 if (m_vr.varying_p ())
1079 return false;
1080 /* ?? We create all sorts of VARYING ranges for floats, structures,
1081 and other types which we cannot handle as ranges. We should
1082 probably avoid handling them throughout the pass, but it's easier
1083 to create a sensible VARYING here and let the lattice
1084 propagate. */
1085 m_vr.set_varying (integer_type_node);
1086 return true;
1089 /* Set lattice value to bottom, if it already isn't the case. */
1091 bool
1092 ipcp_bits_lattice::set_to_bottom ()
1094 if (bottom_p ())
1095 return false;
1096 m_lattice_val = IPA_BITS_VARYING;
1097 m_value = 0;
1098 m_mask = -1;
1099 return true;
1102 /* Set to constant if it isn't already. Only meant to be called
1103 when switching state from TOP. */
1105 bool
1106 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1108 gcc_assert (top_p ());
1109 m_lattice_val = IPA_BITS_CONSTANT;
1110 m_value = wi::bit_and (wi::bit_not (mask), value);
1111 m_mask = mask;
1112 return true;
1115 /* Return true if any of the known bits are non-zero. */
1117 bool
1118 ipcp_bits_lattice::known_nonzero_p () const
1120 if (!constant_p ())
1121 return false;
1122 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1125 /* Convert operand to value, mask form. */
1127 void
1128 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1130 wide_int get_nonzero_bits (const_tree);
1132 if (TREE_CODE (operand) == INTEGER_CST)
1134 *valuep = wi::to_widest (operand);
1135 *maskp = 0;
1137 else
1139 *valuep = 0;
1140 *maskp = -1;
1144 /* Meet operation, similar to ccp_lattice_meet, we xor values
1145 if this->value, value have different values at same bit positions, we want
1146 to drop that bit to varying. Return true if mask is changed.
1147 This function assumes that the lattice value is in CONSTANT state. If
1148 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1150 bool
1151 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1152 unsigned precision, bool drop_all_ones)
1154 gcc_assert (constant_p ());
1156 widest_int old_mask = m_mask;
1157 m_mask = (m_mask | mask) | (m_value ^ value);
1158 if (drop_all_ones)
1159 m_mask |= m_value;
1160 m_value &= ~m_mask;
1162 if (wi::sext (m_mask, precision) == -1)
1163 return set_to_bottom ();
1165 return m_mask != old_mask;
1168 /* Meet the bits lattice with operand
1169 described by <value, mask, sgn, precision. */
1171 bool
1172 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1173 unsigned precision)
1175 if (bottom_p ())
1176 return false;
1178 if (top_p ())
1180 if (wi::sext (mask, precision) == -1)
1181 return set_to_bottom ();
1182 return set_to_constant (value, mask);
1185 return meet_with_1 (value, mask, precision, false);
1188 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1189 if code is binary operation or bit_value_unop (other) if code is unary op.
1190 In the case when code is nop_expr, no adjustment is required. If
1191 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1193 bool
1194 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1195 signop sgn, enum tree_code code, tree operand,
1196 bool drop_all_ones)
1198 if (other.bottom_p ())
1199 return set_to_bottom ();
1201 if (bottom_p () || other.top_p ())
1202 return false;
1204 widest_int adjusted_value, adjusted_mask;
1206 if (TREE_CODE_CLASS (code) == tcc_binary)
1208 tree type = TREE_TYPE (operand);
1209 widest_int o_value, o_mask;
1210 get_value_and_mask (operand, &o_value, &o_mask);
1212 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1213 sgn, precision, other.get_value (), other.get_mask (),
1214 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1216 if (wi::sext (adjusted_mask, precision) == -1)
1217 return set_to_bottom ();
1220 else if (TREE_CODE_CLASS (code) == tcc_unary)
1222 bit_value_unop (code, sgn, precision, &adjusted_value,
1223 &adjusted_mask, sgn, precision, other.get_value (),
1224 other.get_mask ());
1226 if (wi::sext (adjusted_mask, precision) == -1)
1227 return set_to_bottom ();
1230 else
1231 return set_to_bottom ();
1233 if (top_p ())
1235 if (drop_all_ones)
1237 adjusted_mask |= adjusted_value;
1238 adjusted_value &= ~adjusted_mask;
1240 if (wi::sext (adjusted_mask, precision) == -1)
1241 return set_to_bottom ();
1242 return set_to_constant (adjusted_value, adjusted_mask);
1244 else
1245 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1246 drop_all_ones);
1249 /* Dump the contents of the list to FILE. */
1251 void
1252 ipa_argagg_value_list::dump (FILE *f)
1254 bool comma = false;
1255 for (const ipa_argagg_value &av : m_elts)
1257 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1258 av.index, av.unit_offset);
1259 print_generic_expr (f, av.value);
1260 if (av.by_ref)
1261 fprintf (f, "(by_ref)");
1262 comma = true;
1264 fprintf (f, "\n");
1267 /* Dump the contents of the list to stderr. */
1269 void
1270 ipa_argagg_value_list::debug ()
1272 dump (stderr);
1275 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1276 NULL if there is no such constant. */
1278 const ipa_argagg_value *
1279 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1281 ipa_argagg_value key;
1282 key.index = index;
1283 key.unit_offset = unit_offset;
1284 const ipa_argagg_value *res
1285 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1286 [] (const ipa_argagg_value &elt,
1287 const ipa_argagg_value &val)
1289 if (elt.index < val.index)
1290 return true;
1291 if (elt.index > val.index)
1292 return false;
1293 if (elt.unit_offset < val.unit_offset)
1294 return true;
1295 return false;
1298 if (res == m_elts.end ()
1299 || res->index != index
1300 || res->unit_offset != unit_offset)
1301 res = nullptr;
1303 /* TODO: perhaps remove the check (that the underlying array is indeed
1304 sorted) if it turns out it can be too slow? */
1305 if (!flag_checking)
1306 return res;
1308 const ipa_argagg_value *slow_res = NULL;
1309 int prev_index = -1;
1310 unsigned prev_unit_offset = 0;
1311 for (const ipa_argagg_value &av : m_elts)
1313 gcc_assert (prev_index < 0
1314 || prev_index < av.index
1315 || prev_unit_offset < av.unit_offset);
1316 prev_index = av.index;
1317 prev_unit_offset = av.unit_offset;
1318 if (av.index == index
1319 && av.unit_offset == unit_offset)
1320 slow_res = &av;
1322 gcc_assert (res == slow_res);
1324 return res;
1327 /* Return the first item describing a constant stored for parameter with INDEX,
1328 regardless of offset or reference, or NULL if there is no such constant. */
1330 const ipa_argagg_value *
1331 ipa_argagg_value_list::get_elt_for_index (int index) const
1333 const ipa_argagg_value *res
1334 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1335 [] (const ipa_argagg_value &elt, unsigned idx)
1337 return elt.index < idx;
1339 if (res == m_elts.end ()
1340 || res->index != index)
1341 res = nullptr;
1342 return res;
1345 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1346 performing any check of whether value is passed by reference, or NULL_TREE
1347 if there is no such constant. */
1349 tree
1350 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1352 const ipa_argagg_value *av = get_elt (index, unit_offset);
1353 return av ? av->value : NULL_TREE;
1356 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1357 passed by reference or not according to BY_REF, or NULL_TREE if there is
1358 no such constant. */
1360 tree
1361 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1362 bool by_ref) const
1364 const ipa_argagg_value *av = get_elt (index, unit_offset);
1365 if (av && av->by_ref == by_ref)
1366 return av->value;
1367 return NULL_TREE;
1370 /* Return true if all elements present in OTHER are also present in this
1371 list. */
1373 bool
1374 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1376 unsigned j = 0;
1377 for (unsigned i = 0; i < other.m_elts.size (); i++)
1379 unsigned other_index = other.m_elts[i].index;
1380 unsigned other_offset = other.m_elts[i].unit_offset;
1382 while (j < m_elts.size ()
1383 && (m_elts[j].index < other_index
1384 || (m_elts[j].index == other_index
1385 && m_elts[j].unit_offset < other_offset)))
1386 j++;
1388 if (j >= m_elts.size ()
1389 || m_elts[j].index != other_index
1390 || m_elts[j].unit_offset != other_offset
1391 || m_elts[j].by_ref != other.m_elts[i].by_ref
1392 || !m_elts[j].value
1393 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1394 return false;
1396 return true;
1399 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1400 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1401 offsets but skip those which would end up with a negative offset. */
1403 void
1404 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1405 unsigned dest_index,
1406 unsigned unit_delta,
1407 vec<ipa_argagg_value> *res) const
1409 const ipa_argagg_value *av = get_elt_for_index (src_index);
1410 if (!av)
1411 return;
1412 unsigned prev_unit_offset = 0;
1413 bool first = true;
1414 for (; av < m_elts.end (); ++av)
1416 if (av->index > src_index)
1417 return;
1418 if (av->index == src_index
1419 && (av->unit_offset >= unit_delta)
1420 && av->value)
1422 ipa_argagg_value new_av;
1423 gcc_checking_assert (av->value);
1424 new_av.value = av->value;
1425 new_av.unit_offset = av->unit_offset - unit_delta;
1426 new_av.index = dest_index;
1427 new_av.by_ref = av->by_ref;
1429 /* Quick check that the offsets we push are indeed increasing. */
1430 gcc_assert (first
1431 || new_av.unit_offset > prev_unit_offset);
1432 prev_unit_offset = new_av.unit_offset;
1433 first = false;
1435 res->safe_push (new_av);
1440 /* Push to RES information about single lattices describing aggregate values in
1441 PLATS as those describing parameter DEST_INDEX and the original offset minus
1442 UNIT_DELTA. Return true if any item has been pushed to RES. */
1444 static bool
1445 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1446 unsigned unit_delta,
1447 vec<ipa_argagg_value> *res)
1449 if (plats->aggs_contain_variable)
1450 return false;
1452 bool pushed_sth = false;
1453 bool first = true;
1454 unsigned prev_unit_offset = 0;
1455 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1456 if (aglat->is_single_const ()
1457 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1459 ipa_argagg_value iav;
1460 iav.value = aglat->values->value;
1461 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1462 iav.index = dest_index;
1463 iav.by_ref = plats->aggs_by_ref;
1465 gcc_assert (first
1466 || iav.unit_offset > prev_unit_offset);
1467 prev_unit_offset = iav.unit_offset;
1468 first = false;
1470 pushed_sth = true;
1471 res->safe_push (iav);
1473 return pushed_sth;
1476 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1477 Return the number of remaining valid entries. */
1479 static unsigned
1480 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1481 const vec<ipa_argagg_value> &other)
1483 unsigned valid_entries = 0;
1484 unsigned j = 0;
1485 for (unsigned i = 0; i < elts.length (); i++)
1487 if (!elts[i].value)
1488 continue;
1490 unsigned this_index = elts[i].index;
1491 unsigned this_offset = elts[i].unit_offset;
1493 while (j < other.length ()
1494 && (other[j].index < this_index
1495 || (other[j].index == this_index
1496 && other[j].unit_offset < this_offset)))
1497 j++;
1499 if (j >= other.length ())
1501 elts[i].value = NULL_TREE;
1502 continue;
1505 if (other[j].index == this_index
1506 && other[j].unit_offset == this_offset
1507 && other[j].by_ref == elts[i].by_ref
1508 && other[j].value
1509 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1510 valid_entries++;
1511 else
1512 elts[i].value = NULL_TREE;
1514 return valid_entries;
1517 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1518 return true is any of them has not been marked as such so far. */
1520 static inline bool
1521 set_all_contains_variable (class ipcp_param_lattices *plats)
1523 bool ret;
1524 ret = plats->itself.set_contains_variable ();
1525 ret |= plats->ctxlat.set_contains_variable ();
1526 ret |= set_agg_lats_contain_variable (plats);
1527 ret |= plats->bits_lattice.set_to_bottom ();
1528 ret |= plats->m_value_range.set_to_bottom ();
1529 return ret;
1532 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1533 points to by the number of callers to NODE. */
1535 static bool
1536 count_callers (cgraph_node *node, void *data)
1538 int *caller_count = (int *) data;
1540 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1541 /* Local thunks can be handled transparently, but if the thunk cannot
1542 be optimized out, count it as a real use. */
1543 if (!cs->caller->thunk || !cs->caller->local)
1544 ++*caller_count;
1545 return false;
1548 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1549 the one caller of some other node. Set the caller's corresponding flag. */
1551 static bool
1552 set_single_call_flag (cgraph_node *node, void *)
1554 cgraph_edge *cs = node->callers;
1555 /* Local thunks can be handled transparently, skip them. */
1556 while (cs && cs->caller->thunk && cs->caller->local)
1557 cs = cs->next_caller;
1558 if (cs)
1559 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1561 info->node_calling_single_call = true;
1562 return true;
1564 return false;
1567 /* Initialize ipcp_lattices. */
1569 static void
1570 initialize_node_lattices (struct cgraph_node *node)
1572 ipa_node_params *info = ipa_node_params_sum->get (node);
1573 struct cgraph_edge *ie;
1574 bool disable = false, variable = false;
1575 int i;
1577 gcc_checking_assert (node->has_gimple_body_p ());
1579 if (!ipa_get_param_count (info))
1580 disable = true;
1581 else if (node->local)
1583 int caller_count = 0;
1584 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1585 true);
1586 gcc_checking_assert (caller_count > 0);
1587 if (caller_count == 1)
1588 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1589 NULL, true);
1591 else
1593 /* When cloning is allowed, we can assume that externally visible
1594 functions are not called. We will compensate this by cloning
1595 later. */
1596 if (ipcp_versionable_function_p (node)
1597 && ipcp_cloning_candidate_p (node))
1598 variable = true;
1599 else
1600 disable = true;
1603 if (dump_file && (dump_flags & TDF_DETAILS)
1604 && !node->alias && !node->thunk)
1606 fprintf (dump_file, "Initializing lattices of %s\n",
1607 node->dump_name ());
1608 if (disable || variable)
1609 fprintf (dump_file, " Marking all lattices as %s\n",
1610 disable ? "BOTTOM" : "VARIABLE");
1613 auto_vec<bool, 16> surviving_params;
1614 bool pre_modified = false;
1616 clone_info *cinfo = clone_info::get (node);
1618 if (!disable && cinfo && cinfo->param_adjustments)
1620 /* At the moment all IPA optimizations should use the number of
1621 parameters of the prevailing decl as the m_always_copy_start.
1622 Handling any other value would complicate the code below, so for the
1623 time bing let's only assert it is so. */
1624 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1625 == ipa_get_param_count (info))
1626 || cinfo->param_adjustments->m_always_copy_start < 0);
1628 pre_modified = true;
1629 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1631 if (dump_file && (dump_flags & TDF_DETAILS)
1632 && !node->alias && !node->thunk)
1634 bool first = true;
1635 for (int j = 0; j < ipa_get_param_count (info); j++)
1637 if (j < (int) surviving_params.length ()
1638 && surviving_params[j])
1639 continue;
1640 if (first)
1642 fprintf (dump_file,
1643 " The following parameters are dead on arrival:");
1644 first = false;
1646 fprintf (dump_file, " %u", j);
1648 if (!first)
1649 fprintf (dump_file, "\n");
1653 for (i = 0; i < ipa_get_param_count (info); i++)
1655 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1656 if (disable
1657 || !ipa_get_type (info, i)
1658 || (pre_modified && (surviving_params.length () <= (unsigned) i
1659 || !surviving_params[i])))
1661 plats->itself.set_to_bottom ();
1662 plats->ctxlat.set_to_bottom ();
1663 set_agg_lats_to_bottom (plats);
1664 plats->bits_lattice.set_to_bottom ();
1665 plats->m_value_range.m_vr = value_range ();
1666 plats->m_value_range.set_to_bottom ();
1668 else
1670 plats->m_value_range.init ();
1671 if (variable)
1672 set_all_contains_variable (plats);
1676 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1677 if (ie->indirect_info->polymorphic
1678 && ie->indirect_info->param_index >= 0)
1680 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1681 ipa_get_parm_lattices (info,
1682 ie->indirect_info->param_index)->virt_call = 1;
1686 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1687 PARAM_TYPE. */
1689 static bool
1690 ipacp_value_safe_for_type (tree param_type, tree value)
1692 tree val_type = TREE_TYPE (value);
1693 if (param_type == val_type
1694 || useless_type_conversion_p (param_type, val_type)
1695 || fold_convertible_p (param_type, value))
1696 return true;
1697 else
1698 return false;
1701 /* Return the result of a (possibly arithmetic) operation on the constant
1702 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1703 the type of the parameter to which the result is passed. Return
1704 NULL_TREE if that cannot be determined or be considered an
1705 interprocedural invariant. */
1707 static tree
1708 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1709 tree res_type)
1711 tree res;
1713 if (opcode == NOP_EXPR)
1714 return input;
1715 if (!is_gimple_ip_invariant (input))
1716 return NULL_TREE;
1718 if (opcode == ASSERT_EXPR)
1720 if (values_equal_for_ipcp_p (input, operand))
1721 return input;
1722 else
1723 return NULL_TREE;
1726 if (!res_type)
1728 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1729 res_type = boolean_type_node;
1730 else if (expr_type_first_operand_type_p (opcode))
1731 res_type = TREE_TYPE (input);
1732 else
1733 return NULL_TREE;
1736 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1737 res = fold_unary (opcode, res_type, input);
1738 else
1739 res = fold_binary (opcode, res_type, input, operand);
1741 if (res && !is_gimple_ip_invariant (res))
1742 return NULL_TREE;
1744 return res;
1747 /* Return the result of a (possibly arithmetic) pass through jump function
1748 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1749 to which the result is passed. Return NULL_TREE if that cannot be
1750 determined or be considered an interprocedural invariant. */
1752 static tree
1753 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1754 tree res_type)
1756 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1757 input,
1758 ipa_get_jf_pass_through_operand (jfunc),
1759 res_type);
1762 /* Return the result of an ancestor jump function JFUNC on the constant value
1763 INPUT. Return NULL_TREE if that cannot be determined. */
1765 static tree
1766 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1768 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1769 if (TREE_CODE (input) == ADDR_EXPR)
1771 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1772 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1773 if (known_eq (off, 0))
1774 return input;
1775 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1776 return build1 (ADDR_EXPR, TREE_TYPE (input),
1777 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1778 build_int_cst (ptr_type_node, byte_offset)));
1780 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1781 && zerop (input))
1782 return input;
1783 else
1784 return NULL_TREE;
1787 /* Determine whether JFUNC evaluates to a single known constant value and if
1788 so, return it. Otherwise return NULL. INFO describes the caller node or
1789 the one it is inlined to, so that pass-through jump functions can be
1790 evaluated. PARM_TYPE is the type of the parameter to which the result is
1791 passed. */
1793 tree
1794 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1795 tree parm_type)
1797 if (jfunc->type == IPA_JF_CONST)
1798 return ipa_get_jf_constant (jfunc);
1799 else if (jfunc->type == IPA_JF_PASS_THROUGH
1800 || jfunc->type == IPA_JF_ANCESTOR)
1802 tree input;
1803 int idx;
1805 if (jfunc->type == IPA_JF_PASS_THROUGH)
1806 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1807 else
1808 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1810 if (info->ipcp_orig_node)
1811 input = info->known_csts[idx];
1812 else
1814 ipcp_lattice<tree> *lat;
1816 if (!info->lattices
1817 || idx >= ipa_get_param_count (info))
1818 return NULL_TREE;
1819 lat = ipa_get_scalar_lat (info, idx);
1820 if (!lat->is_single_const ())
1821 return NULL_TREE;
1822 input = lat->values->value;
1825 if (!input)
1826 return NULL_TREE;
1828 if (jfunc->type == IPA_JF_PASS_THROUGH)
1829 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1830 else
1831 return ipa_get_jf_ancestor_result (jfunc, input);
1833 else
1834 return NULL_TREE;
1837 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1838 that INFO describes the caller node or the one it is inlined to, CS is the
1839 call graph edge corresponding to JFUNC and CSIDX index of the described
1840 parameter. */
1842 ipa_polymorphic_call_context
1843 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1844 ipa_jump_func *jfunc)
1846 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1847 ipa_polymorphic_call_context ctx;
1848 ipa_polymorphic_call_context *edge_ctx
1849 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1851 if (edge_ctx && !edge_ctx->useless_p ())
1852 ctx = *edge_ctx;
1854 if (jfunc->type == IPA_JF_PASS_THROUGH
1855 || jfunc->type == IPA_JF_ANCESTOR)
1857 ipa_polymorphic_call_context srcctx;
1858 int srcidx;
1859 bool type_preserved = true;
1860 if (jfunc->type == IPA_JF_PASS_THROUGH)
1862 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1863 return ctx;
1864 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1865 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1867 else
1869 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1870 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1872 if (info->ipcp_orig_node)
1874 if (info->known_contexts.exists ())
1875 srcctx = info->known_contexts[srcidx];
1877 else
1879 if (!info->lattices
1880 || srcidx >= ipa_get_param_count (info))
1881 return ctx;
1882 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1883 lat = ipa_get_poly_ctx_lat (info, srcidx);
1884 if (!lat->is_single_const ())
1885 return ctx;
1886 srcctx = lat->values->value;
1888 if (srcctx.useless_p ())
1889 return ctx;
1890 if (jfunc->type == IPA_JF_ANCESTOR)
1891 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1892 if (!type_preserved)
1893 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1894 srcctx.combine_with (ctx);
1895 return srcctx;
1898 return ctx;
1901 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1902 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1903 the result is a range or an anti-range. */
1905 static bool
1906 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1907 value_range *src_vr,
1908 enum tree_code operation,
1909 tree dst_type, tree src_type)
1911 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1912 return false;
1914 range_op_handler handler (operation, dst_type);
1915 return (handler
1916 && handler.fold_range (*dst_vr, dst_type,
1917 *src_vr, value_range (dst_type))
1918 && !dst_vr->varying_p ()
1919 && !dst_vr->undefined_p ());
1922 /* Determine range of JFUNC given that INFO describes the caller node or
1923 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1924 and PARM_TYPE of the parameter. */
1926 value_range
1927 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1928 ipa_jump_func *jfunc, tree parm_type)
1930 value_range vr;
1931 if (jfunc->m_vr)
1932 ipa_vr_operation_and_type_effects (&vr,
1933 jfunc->m_vr,
1934 NOP_EXPR, parm_type,
1935 jfunc->m_vr->type ());
1936 if (vr.singleton_p ())
1937 return vr;
1938 if (jfunc->type == IPA_JF_PASS_THROUGH)
1940 int idx;
1941 ipcp_transformation *sum
1942 = ipcp_get_transformation_summary (cs->caller->inlined_to
1943 ? cs->caller->inlined_to
1944 : cs->caller);
1945 if (!sum || !sum->m_vr)
1946 return vr;
1948 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1950 if (!(*sum->m_vr)[idx].known_p ())
1951 return vr;
1952 tree vr_type = ipa_get_type (info, idx);
1953 value_range srcvr;
1954 (*sum->m_vr)[idx].get_vrange (srcvr);
1956 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1958 if (TREE_CODE_CLASS (operation) == tcc_unary)
1960 value_range res;
1962 if (ipa_vr_operation_and_type_effects (&res,
1963 &srcvr,
1964 operation, parm_type,
1965 vr_type))
1966 vr.intersect (res);
1968 else
1970 value_range op_res, res;
1971 tree op = ipa_get_jf_pass_through_operand (jfunc);
1972 value_range op_vr;
1973 range_op_handler handler (operation, vr_type);
1975 ipa_range_set_and_normalize (op_vr, op);
1977 if (!handler
1978 || !op_res.supports_type_p (vr_type)
1979 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
1980 op_res.set_varying (vr_type);
1982 if (ipa_vr_operation_and_type_effects (&res,
1983 &op_res,
1984 NOP_EXPR, parm_type,
1985 vr_type))
1986 vr.intersect (res);
1989 return vr;
1992 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1993 single known constant value and if so, return it. Otherwise return NULL.
1994 NODE and INFO describes the caller node or the one it is inlined to, and
1995 its related info. */
1997 tree
1998 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1999 const ipa_agg_jf_item *item)
2001 tree value = NULL_TREE;
2002 int src_idx;
2004 if (item->offset < 0
2005 || item->jftype == IPA_JF_UNKNOWN
2006 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
2007 return NULL_TREE;
2009 if (item->jftype == IPA_JF_CONST)
2010 return item->value.constant;
2012 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2013 || item->jftype == IPA_JF_LOAD_AGG);
2015 src_idx = item->value.pass_through.formal_id;
2017 if (info->ipcp_orig_node)
2019 if (item->jftype == IPA_JF_PASS_THROUGH)
2020 value = info->known_csts[src_idx];
2021 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2023 ipa_argagg_value_list avl (ts);
2024 value = avl.get_value (src_idx,
2025 item->value.load_agg.offset / BITS_PER_UNIT,
2026 item->value.load_agg.by_ref);
2029 else if (info->lattices)
2031 class ipcp_param_lattices *src_plats
2032 = ipa_get_parm_lattices (info, src_idx);
2034 if (item->jftype == IPA_JF_PASS_THROUGH)
2036 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2038 if (!lat->is_single_const ())
2039 return NULL_TREE;
2041 value = lat->values->value;
2043 else if (src_plats->aggs
2044 && !src_plats->aggs_bottom
2045 && !src_plats->aggs_contain_variable
2046 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2048 struct ipcp_agg_lattice *aglat;
2050 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2052 if (aglat->offset > item->value.load_agg.offset)
2053 break;
2055 if (aglat->offset == item->value.load_agg.offset)
2057 if (aglat->is_single_const ())
2058 value = aglat->values->value;
2059 break;
2065 if (!value)
2066 return NULL_TREE;
2068 if (item->jftype == IPA_JF_LOAD_AGG)
2070 tree load_type = item->value.load_agg.type;
2071 tree value_type = TREE_TYPE (value);
2073 /* Ensure value type is compatible with load type. */
2074 if (!useless_type_conversion_p (load_type, value_type))
2075 return NULL_TREE;
2078 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2079 value,
2080 item->value.pass_through.operand,
2081 item->type);
2084 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2085 caller is inlined to) NODE which described by INFO and push the results to
2086 RES as describing values passed in parameter DST_INDEX. */
2088 void
2089 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2090 ipa_agg_jump_function *agg_jfunc,
2091 unsigned dst_index,
2092 vec<ipa_argagg_value> *res)
2094 unsigned prev_unit_offset = 0;
2095 bool first = true;
2097 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2099 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2100 if (!value)
2101 continue;
2103 ipa_argagg_value iav;
2104 iav.value = value;
2105 iav.unit_offset = item.offset / BITS_PER_UNIT;
2106 iav.index = dst_index;
2107 iav.by_ref = agg_jfunc->by_ref;
2109 gcc_assert (first
2110 || iav.unit_offset > prev_unit_offset);
2111 prev_unit_offset = iav.unit_offset;
2112 first = false;
2114 res->safe_push (iav);
2118 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2119 bottom, not containing a variable component and without any known value at
2120 the same time. */
2122 DEBUG_FUNCTION void
2123 ipcp_verify_propagated_values (void)
2125 struct cgraph_node *node;
2127 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2129 ipa_node_params *info = ipa_node_params_sum->get (node);
2130 if (!opt_for_fn (node->decl, flag_ipa_cp)
2131 || !opt_for_fn (node->decl, optimize))
2132 continue;
2133 int i, count = ipa_get_param_count (info);
2135 for (i = 0; i < count; i++)
2137 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2139 if (!lat->bottom
2140 && !lat->contains_variable
2141 && lat->values_count == 0)
2143 if (dump_file)
2145 symtab->dump (dump_file);
2146 fprintf (dump_file, "\nIPA lattices after constant "
2147 "propagation, before gcc_unreachable:\n");
2148 print_all_lattices (dump_file, true, false);
2151 gcc_unreachable ();
2157 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2159 static bool
2160 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2161 ipa_polymorphic_call_context y)
2163 return x.equal_to (y);
2167 /* Add a new value source to the value represented by THIS, marking that a
2168 value comes from edge CS and (if the underlying jump function is a
2169 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2170 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2171 scalar value of the parameter itself or the offset within an aggregate. */
2173 template <typename valtype>
2174 void
2175 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2176 int src_idx, HOST_WIDE_INT offset)
2178 ipcp_value_source<valtype> *src;
2180 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2181 src->offset = offset;
2182 src->cs = cs;
2183 src->val = src_val;
2184 src->index = src_idx;
2186 src->next = sources;
2187 sources = src;
2190 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2191 SOURCE and clear all other fields. */
2193 static ipcp_value<tree> *
2194 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2196 ipcp_value<tree> *val;
2198 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2199 val->value = cst;
2200 val->self_recursion_generated_level = same_lat_gen_level;
2201 return val;
2204 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2205 value to SOURCE and clear all other fields. */
2207 static ipcp_value<ipa_polymorphic_call_context> *
2208 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2209 unsigned same_lat_gen_level)
2211 ipcp_value<ipa_polymorphic_call_context> *val;
2213 val = new (ipcp_poly_ctx_values_pool.allocate ())
2214 ipcp_value<ipa_polymorphic_call_context>();
2215 val->value = ctx;
2216 val->self_recursion_generated_level = same_lat_gen_level;
2217 return val;
2220 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2221 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2222 meaning. OFFSET -1 means the source is scalar and not a part of an
2223 aggregate. If non-NULL, VAL_P records address of existing or newly added
2224 ipcp_value.
2226 If the value is generated for a self-recursive call as a result of an
2227 arithmetic pass-through jump-function acting on a value in the same lattice,
2228 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2229 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2231 template <typename valtype>
2232 bool
2233 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2234 ipcp_value<valtype> *src_val,
2235 int src_idx, HOST_WIDE_INT offset,
2236 ipcp_value<valtype> **val_p,
2237 unsigned same_lat_gen_level)
2239 ipcp_value<valtype> *val, *last_val = NULL;
2241 if (val_p)
2242 *val_p = NULL;
2244 if (bottom)
2245 return false;
2247 for (val = values; val; last_val = val, val = val->next)
2248 if (values_equal_for_ipcp_p (val->value, newval))
2250 if (val_p)
2251 *val_p = val;
2253 if (val->self_recursion_generated_level < same_lat_gen_level)
2254 val->self_recursion_generated_level = same_lat_gen_level;
2256 if (ipa_edge_within_scc (cs))
2258 ipcp_value_source<valtype> *s;
2259 for (s = val->sources; s; s = s->next)
2260 if (s->cs == cs && s->val == src_val)
2261 break;
2262 if (s)
2263 return false;
2266 val->add_source (cs, src_val, src_idx, offset);
2267 return false;
2270 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2271 param_ipa_cp_value_list_size))
2273 /* We can only free sources, not the values themselves, because sources
2274 of other values in this SCC might point to them. */
2275 for (val = values; val; val = val->next)
2277 while (val->sources)
2279 ipcp_value_source<valtype> *src = val->sources;
2280 val->sources = src->next;
2281 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2284 values = NULL;
2285 return set_to_bottom ();
2288 values_count++;
2289 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2290 val->add_source (cs, src_val, src_idx, offset);
2291 val->next = NULL;
2293 /* Add the new value to end of value list, which can reduce iterations
2294 of propagation stage for recursive function. */
2295 if (last_val)
2296 last_val->next = val;
2297 else
2298 values = val;
2300 if (val_p)
2301 *val_p = val;
2303 return true;
2306 /* A helper function that returns result of operation specified by OPCODE on
2307 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2308 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2309 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2310 the result. */
2312 static tree
2313 get_val_across_arith_op (enum tree_code opcode,
2314 tree opnd1_type,
2315 tree opnd2,
2316 ipcp_value<tree> *src_val,
2317 tree res_type)
2319 tree opnd1 = src_val->value;
2321 /* Skip source values that is incompatible with specified type. */
2322 if (opnd1_type
2323 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2324 return NULL_TREE;
2326 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2329 /* Propagate values through an arithmetic transformation described by a jump
2330 function associated with edge CS, taking values from SRC_LAT and putting
2331 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2332 OPND2 is a constant value if transformation is a binary operation.
2333 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2334 a part of the aggregate. SRC_IDX is the index of the source parameter.
2335 RES_TYPE is the value type of result being propagated into. Return true if
2336 DEST_LAT changed. */
2338 static bool
2339 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2340 enum tree_code opcode,
2341 tree opnd1_type,
2342 tree opnd2,
2343 ipcp_lattice<tree> *src_lat,
2344 ipcp_lattice<tree> *dest_lat,
2345 HOST_WIDE_INT src_offset,
2346 int src_idx,
2347 tree res_type)
2349 ipcp_value<tree> *src_val;
2350 bool ret = false;
2352 /* Due to circular dependencies, propagating within an SCC through arithmetic
2353 transformation would create infinite number of values. But for
2354 self-feeding recursive function, we could allow propagation in a limited
2355 count, and this can enable a simple kind of recursive function versioning.
2356 For other scenario, we would just make lattices bottom. */
2357 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2359 int i;
2361 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2362 param_ipa_cp_max_recursive_depth);
2363 if (src_lat != dest_lat || max_recursive_depth < 1)
2364 return dest_lat->set_contains_variable ();
2366 /* No benefit if recursive execution is in low probability. */
2367 if (cs->sreal_frequency () * 100
2368 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2369 param_ipa_cp_min_recursive_probability))
2370 return dest_lat->set_contains_variable ();
2372 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2374 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2376 /* Now we do not use self-recursively generated value as propagation
2377 source, this is absolutely conservative, but could avoid explosion
2378 of lattice's value space, especially when one recursive function
2379 calls another recursive. */
2380 if (src_val->self_recursion_generated_p ())
2382 ipcp_value_source<tree> *s;
2384 /* If the lattice has already been propagated for the call site,
2385 no need to do that again. */
2386 for (s = src_val->sources; s; s = s->next)
2387 if (s->cs == cs)
2388 return dest_lat->set_contains_variable ();
2390 else
2391 val_seeds.safe_push (src_val);
2394 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2396 /* Recursively generate lattice values with a limited count. */
2397 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2399 for (int j = 1; j < max_recursive_depth; j++)
2401 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2402 src_val, res_type);
2403 if (!cstval
2404 || !ipacp_value_safe_for_type (res_type, cstval))
2405 break;
2407 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2408 src_offset, &src_val, j);
2409 gcc_checking_assert (src_val);
2412 ret |= dest_lat->set_contains_variable ();
2414 else
2415 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2417 /* Now we do not use self-recursively generated value as propagation
2418 source, otherwise it is easy to make value space of normal lattice
2419 overflow. */
2420 if (src_val->self_recursion_generated_p ())
2422 ret |= dest_lat->set_contains_variable ();
2423 continue;
2426 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2427 src_val, res_type);
2428 if (cstval
2429 && ipacp_value_safe_for_type (res_type, cstval))
2430 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2431 src_offset);
2432 else
2433 ret |= dest_lat->set_contains_variable ();
2436 return ret;
2439 /* Propagate values through a pass-through jump function JFUNC associated with
2440 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2441 is the index of the source parameter. PARM_TYPE is the type of the
2442 parameter to which the result is passed. */
2444 static bool
2445 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2446 ipcp_lattice<tree> *src_lat,
2447 ipcp_lattice<tree> *dest_lat, int src_idx,
2448 tree parm_type)
2450 return propagate_vals_across_arith_jfunc (cs,
2451 ipa_get_jf_pass_through_operation (jfunc),
2452 NULL_TREE,
2453 ipa_get_jf_pass_through_operand (jfunc),
2454 src_lat, dest_lat, -1, src_idx, parm_type);
2457 /* Propagate values through an ancestor jump function JFUNC associated with
2458 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2459 is the index of the source parameter. */
2461 static bool
2462 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2463 struct ipa_jump_func *jfunc,
2464 ipcp_lattice<tree> *src_lat,
2465 ipcp_lattice<tree> *dest_lat, int src_idx,
2466 tree param_type)
2468 ipcp_value<tree> *src_val;
2469 bool ret = false;
2471 if (ipa_edge_within_scc (cs))
2472 return dest_lat->set_contains_variable ();
2474 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2476 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2478 if (t && ipacp_value_safe_for_type (param_type, t))
2479 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2480 else
2481 ret |= dest_lat->set_contains_variable ();
2484 return ret;
2487 /* Propagate scalar values across jump function JFUNC that is associated with
2488 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2489 parameter to which the result is passed. */
2491 static bool
2492 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2493 struct ipa_jump_func *jfunc,
2494 ipcp_lattice<tree> *dest_lat,
2495 tree param_type)
2497 if (dest_lat->bottom)
2498 return false;
2500 if (jfunc->type == IPA_JF_CONST)
2502 tree val = ipa_get_jf_constant (jfunc);
2503 if (ipacp_value_safe_for_type (param_type, val))
2504 return dest_lat->add_value (val, cs, NULL, 0);
2505 else
2506 return dest_lat->set_contains_variable ();
2508 else if (jfunc->type == IPA_JF_PASS_THROUGH
2509 || jfunc->type == IPA_JF_ANCESTOR)
2511 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2512 ipcp_lattice<tree> *src_lat;
2513 int src_idx;
2514 bool ret;
2516 if (jfunc->type == IPA_JF_PASS_THROUGH)
2517 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2518 else
2519 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2521 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2522 if (src_lat->bottom)
2523 return dest_lat->set_contains_variable ();
2525 /* If we would need to clone the caller and cannot, do not propagate. */
2526 if (!ipcp_versionable_function_p (cs->caller)
2527 && (src_lat->contains_variable
2528 || (src_lat->values_count > 1)))
2529 return dest_lat->set_contains_variable ();
2531 if (jfunc->type == IPA_JF_PASS_THROUGH)
2532 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2533 dest_lat, src_idx,
2534 param_type);
2535 else
2536 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2537 src_idx, param_type);
2539 if (src_lat->contains_variable)
2540 ret |= dest_lat->set_contains_variable ();
2542 return ret;
2545 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2546 use it for indirect inlining), we should propagate them too. */
2547 return dest_lat->set_contains_variable ();
2550 /* Propagate scalar values across jump function JFUNC that is associated with
2551 edge CS and describes argument IDX and put the values into DEST_LAT. */
2553 static bool
2554 propagate_context_across_jump_function (cgraph_edge *cs,
2555 ipa_jump_func *jfunc, int idx,
2556 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2558 if (dest_lat->bottom)
2559 return false;
2560 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2561 bool ret = false;
2562 bool added_sth = false;
2563 bool type_preserved = true;
2565 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2566 = ipa_get_ith_polymorhic_call_context (args, idx);
2568 if (edge_ctx_ptr)
2569 edge_ctx = *edge_ctx_ptr;
2571 if (jfunc->type == IPA_JF_PASS_THROUGH
2572 || jfunc->type == IPA_JF_ANCESTOR)
2574 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2575 int src_idx;
2576 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2578 /* TODO: Once we figure out how to propagate speculations, it will
2579 probably be a good idea to switch to speculation if type_preserved is
2580 not set instead of punting. */
2581 if (jfunc->type == IPA_JF_PASS_THROUGH)
2583 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2584 goto prop_fail;
2585 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2586 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2588 else
2590 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2591 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2594 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2595 /* If we would need to clone the caller and cannot, do not propagate. */
2596 if (!ipcp_versionable_function_p (cs->caller)
2597 && (src_lat->contains_variable
2598 || (src_lat->values_count > 1)))
2599 goto prop_fail;
2601 ipcp_value<ipa_polymorphic_call_context> *src_val;
2602 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2604 ipa_polymorphic_call_context cur = src_val->value;
2606 if (!type_preserved)
2607 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2608 if (jfunc->type == IPA_JF_ANCESTOR)
2609 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2610 /* TODO: In cases we know how the context is going to be used,
2611 we can improve the result by passing proper OTR_TYPE. */
2612 cur.combine_with (edge_ctx);
2613 if (!cur.useless_p ())
2615 if (src_lat->contains_variable
2616 && !edge_ctx.equal_to (cur))
2617 ret |= dest_lat->set_contains_variable ();
2618 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2619 added_sth = true;
2624 prop_fail:
2625 if (!added_sth)
2627 if (!edge_ctx.useless_p ())
2628 ret |= dest_lat->add_value (edge_ctx, cs);
2629 else
2630 ret |= dest_lat->set_contains_variable ();
2633 return ret;
2636 /* Propagate bits across jfunc that is associated with
2637 edge cs and update dest_lattice accordingly. */
2639 bool
2640 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2641 ipa_jump_func *jfunc,
2642 ipcp_bits_lattice *dest_lattice)
2644 if (dest_lattice->bottom_p ())
2645 return false;
2647 enum availability availability;
2648 cgraph_node *callee = cs->callee->function_symbol (&availability);
2649 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2650 tree parm_type = ipa_get_type (callee_info, idx);
2652 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2653 transform for these cases. Similarly, we can have bad type mismatches
2654 with LTO, avoid doing anything with those too. */
2655 if (!parm_type
2656 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2658 if (dump_file && (dump_flags & TDF_DETAILS))
2659 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2660 "param %i of %s is NULL or unsuitable for bits propagation\n",
2661 idx, cs->callee->dump_name ());
2663 return dest_lattice->set_to_bottom ();
2666 unsigned precision = TYPE_PRECISION (parm_type);
2667 signop sgn = TYPE_SIGN (parm_type);
2669 if (jfunc->type == IPA_JF_PASS_THROUGH
2670 || jfunc->type == IPA_JF_ANCESTOR)
2672 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2673 tree operand = NULL_TREE;
2674 enum tree_code code;
2675 unsigned src_idx;
2676 bool keep_null = false;
2678 if (jfunc->type == IPA_JF_PASS_THROUGH)
2680 code = ipa_get_jf_pass_through_operation (jfunc);
2681 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2682 if (code != NOP_EXPR)
2683 operand = ipa_get_jf_pass_through_operand (jfunc);
2685 else
2687 code = POINTER_PLUS_EXPR;
2688 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2689 unsigned HOST_WIDE_INT offset
2690 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2691 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2692 operand = build_int_cstu (size_type_node, offset);
2695 class ipcp_param_lattices *src_lats
2696 = ipa_get_parm_lattices (caller_info, src_idx);
2698 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2699 for eg consider:
2700 int f(int x)
2702 g (x & 0xff);
2704 Assume lattice for x is bottom, however we can still propagate
2705 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2706 and we store it in jump function during analysis stage. */
2708 if (!src_lats->bits_lattice.bottom_p ())
2710 bool drop_all_ones
2711 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2713 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2714 sgn, code, operand, drop_all_ones);
2718 if (jfunc->bits)
2719 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2720 precision);
2721 else
2722 return dest_lattice->set_to_bottom ();
2725 /* Propagate value range across jump function JFUNC that is associated with
2726 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2727 accordingly. */
2729 static bool
2730 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2731 class ipcp_param_lattices *dest_plats,
2732 tree param_type)
2734 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2736 if (dest_lat->bottom_p ())
2737 return false;
2739 if (!param_type
2740 || (!INTEGRAL_TYPE_P (param_type)
2741 && !POINTER_TYPE_P (param_type)))
2742 return dest_lat->set_to_bottom ();
2744 if (jfunc->type == IPA_JF_PASS_THROUGH)
2746 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2747 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2748 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2749 class ipcp_param_lattices *src_lats
2750 = ipa_get_parm_lattices (caller_info, src_idx);
2751 tree operand_type = ipa_get_type (caller_info, src_idx);
2753 if (src_lats->m_value_range.bottom_p ())
2754 return dest_lat->set_to_bottom ();
2756 value_range vr;
2757 if (TREE_CODE_CLASS (operation) == tcc_unary)
2758 ipa_vr_operation_and_type_effects (&vr,
2759 &src_lats->m_value_range.m_vr,
2760 operation, param_type,
2761 operand_type);
2762 /* A crude way to prevent unbounded number of value range updates
2763 in SCC components. We should allow limited number of updates within
2764 SCC, too. */
2765 else if (!ipa_edge_within_scc (cs))
2767 tree op = ipa_get_jf_pass_through_operand (jfunc);
2768 value_range op_vr;
2769 value_range op_res,res;
2770 range_op_handler handler (operation, operand_type);
2772 ipa_range_set_and_normalize (op_vr, op);
2774 if (!handler
2775 || !op_res.supports_type_p (operand_type)
2776 || !handler.fold_range (op_res, operand_type,
2777 src_lats->m_value_range.m_vr, op_vr))
2778 op_res.set_varying (operand_type);
2780 ipa_vr_operation_and_type_effects (&vr,
2781 &op_res,
2782 NOP_EXPR, param_type,
2783 operand_type);
2785 if (!vr.undefined_p () && !vr.varying_p ())
2787 if (jfunc->m_vr)
2789 value_range jvr;
2790 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2791 NOP_EXPR,
2792 param_type,
2793 jfunc->m_vr->type ()))
2794 vr.intersect (jvr);
2796 return dest_lat->meet_with (&vr);
2799 else if (jfunc->type == IPA_JF_CONST)
2801 tree val = ipa_get_jf_constant (jfunc);
2802 if (TREE_CODE (val) == INTEGER_CST)
2804 val = fold_convert (param_type, val);
2805 if (TREE_OVERFLOW_P (val))
2806 val = drop_tree_overflow (val);
2808 value_range tmpvr (TREE_TYPE (val),
2809 wi::to_wide (val), wi::to_wide (val));
2810 return dest_lat->meet_with (&tmpvr);
2814 value_range vr;
2815 if (jfunc->m_vr
2816 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2817 param_type,
2818 jfunc->m_vr->type ()))
2819 return dest_lat->meet_with (&vr);
2820 else
2821 return dest_lat->set_to_bottom ();
2824 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2825 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2826 other cases, return false). If there are no aggregate items, set
2827 aggs_by_ref to NEW_AGGS_BY_REF. */
2829 static bool
2830 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2831 bool new_aggs_by_ref)
2833 if (dest_plats->aggs)
2835 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2837 set_agg_lats_to_bottom (dest_plats);
2838 return true;
2841 else
2842 dest_plats->aggs_by_ref = new_aggs_by_ref;
2843 return false;
2846 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2847 already existing lattice for the given OFFSET and SIZE, marking all skipped
2848 lattices as containing variable and checking for overlaps. If there is no
2849 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2850 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2851 unless there are too many already. If there are two many, return false. If
2852 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2853 skipped lattices were newly marked as containing variable, set *CHANGE to
2854 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2856 static bool
2857 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2858 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2859 struct ipcp_agg_lattice ***aglat,
2860 bool pre_existing, bool *change, int max_agg_items)
2862 gcc_checking_assert (offset >= 0);
2864 while (**aglat && (**aglat)->offset < offset)
2866 if ((**aglat)->offset + (**aglat)->size > offset)
2868 set_agg_lats_to_bottom (dest_plats);
2869 return false;
2871 *change |= (**aglat)->set_contains_variable ();
2872 *aglat = &(**aglat)->next;
2875 if (**aglat && (**aglat)->offset == offset)
2877 if ((**aglat)->size != val_size)
2879 set_agg_lats_to_bottom (dest_plats);
2880 return false;
2882 gcc_assert (!(**aglat)->next
2883 || (**aglat)->next->offset >= offset + val_size);
2884 return true;
2886 else
2888 struct ipcp_agg_lattice *new_al;
2890 if (**aglat && (**aglat)->offset < offset + val_size)
2892 set_agg_lats_to_bottom (dest_plats);
2893 return false;
2895 if (dest_plats->aggs_count == max_agg_items)
2896 return false;
2897 dest_plats->aggs_count++;
2898 new_al = ipcp_agg_lattice_pool.allocate ();
2899 memset (new_al, 0, sizeof (*new_al));
2901 new_al->offset = offset;
2902 new_al->size = val_size;
2903 new_al->contains_variable = pre_existing;
2905 new_al->next = **aglat;
2906 **aglat = new_al;
2907 return true;
2911 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2912 containing an unknown value. */
2914 static bool
2915 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2917 bool ret = false;
2918 while (aglat)
2920 ret |= aglat->set_contains_variable ();
2921 aglat = aglat->next;
2923 return ret;
2926 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2927 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2928 parameter used for lattice value sources. Return true if DEST_PLATS changed
2929 in any way. */
2931 static bool
2932 merge_aggregate_lattices (struct cgraph_edge *cs,
2933 class ipcp_param_lattices *dest_plats,
2934 class ipcp_param_lattices *src_plats,
2935 int src_idx, HOST_WIDE_INT offset_delta)
2937 bool pre_existing = dest_plats->aggs != NULL;
2938 struct ipcp_agg_lattice **dst_aglat;
2939 bool ret = false;
2941 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2942 return true;
2943 if (src_plats->aggs_bottom)
2944 return set_agg_lats_contain_variable (dest_plats);
2945 if (src_plats->aggs_contain_variable)
2946 ret |= set_agg_lats_contain_variable (dest_plats);
2947 dst_aglat = &dest_plats->aggs;
2949 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2950 param_ipa_max_agg_items);
2951 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2952 src_aglat;
2953 src_aglat = src_aglat->next)
2955 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2957 if (new_offset < 0)
2958 continue;
2959 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2960 &dst_aglat, pre_existing, &ret, max_agg_items))
2962 struct ipcp_agg_lattice *new_al = *dst_aglat;
2964 dst_aglat = &(*dst_aglat)->next;
2965 if (src_aglat->bottom)
2967 ret |= new_al->set_contains_variable ();
2968 continue;
2970 if (src_aglat->contains_variable)
2971 ret |= new_al->set_contains_variable ();
2972 for (ipcp_value<tree> *val = src_aglat->values;
2973 val;
2974 val = val->next)
2975 ret |= new_al->add_value (val->value, cs, val, src_idx,
2976 src_aglat->offset);
2978 else if (dest_plats->aggs_bottom)
2979 return true;
2981 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2982 return ret;
2985 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2986 pass-through JFUNC and if so, whether it has conform and conforms to the
2987 rules about propagating values passed by reference. */
2989 static bool
2990 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2991 struct ipa_jump_func *jfunc)
2993 return src_plats->aggs
2994 && (!src_plats->aggs_by_ref
2995 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2998 /* Propagate values through ITEM, jump function for a part of an aggregate,
2999 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3000 associated with the jump function. Return true if AGLAT changed in any
3001 way. */
3003 static bool
3004 propagate_aggregate_lattice (struct cgraph_edge *cs,
3005 struct ipa_agg_jf_item *item,
3006 struct ipcp_agg_lattice *aglat)
3008 class ipa_node_params *caller_info;
3009 class ipcp_param_lattices *src_plats;
3010 struct ipcp_lattice<tree> *src_lat;
3011 HOST_WIDE_INT src_offset;
3012 int src_idx;
3013 tree load_type;
3014 bool ret;
3016 if (item->jftype == IPA_JF_CONST)
3018 tree value = item->value.constant;
3020 gcc_checking_assert (is_gimple_ip_invariant (value));
3021 return aglat->add_value (value, cs, NULL, 0);
3024 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
3025 || item->jftype == IPA_JF_LOAD_AGG);
3027 caller_info = ipa_node_params_sum->get (cs->caller);
3028 src_idx = item->value.pass_through.formal_id;
3029 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3031 if (item->jftype == IPA_JF_PASS_THROUGH)
3033 load_type = NULL_TREE;
3034 src_lat = &src_plats->itself;
3035 src_offset = -1;
3037 else
3039 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3040 struct ipcp_agg_lattice *src_aglat;
3042 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3043 if (src_aglat->offset >= load_offset)
3044 break;
3046 load_type = item->value.load_agg.type;
3047 if (!src_aglat
3048 || src_aglat->offset > load_offset
3049 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3050 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3051 return aglat->set_contains_variable ();
3053 src_lat = src_aglat;
3054 src_offset = load_offset;
3057 if (src_lat->bottom
3058 || (!ipcp_versionable_function_p (cs->caller)
3059 && !src_lat->is_single_const ()))
3060 return aglat->set_contains_variable ();
3062 ret = propagate_vals_across_arith_jfunc (cs,
3063 item->value.pass_through.operation,
3064 load_type,
3065 item->value.pass_through.operand,
3066 src_lat, aglat,
3067 src_offset,
3068 src_idx,
3069 item->type);
3071 if (src_lat->contains_variable)
3072 ret |= aglat->set_contains_variable ();
3074 return ret;
3077 /* Propagate scalar values across jump function JFUNC that is associated with
3078 edge CS and put the values into DEST_LAT. */
3080 static bool
3081 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3082 struct ipa_jump_func *jfunc,
3083 class ipcp_param_lattices *dest_plats)
3085 bool ret = false;
3087 if (dest_plats->aggs_bottom)
3088 return false;
3090 if (jfunc->type == IPA_JF_PASS_THROUGH
3091 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3093 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3094 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3095 class ipcp_param_lattices *src_plats;
3097 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3098 if (agg_pass_through_permissible_p (src_plats, jfunc))
3100 /* Currently we do not produce clobber aggregate jump
3101 functions, replace with merging when we do. */
3102 gcc_assert (!jfunc->agg.items);
3103 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3104 src_idx, 0);
3105 return ret;
3108 else if (jfunc->type == IPA_JF_ANCESTOR
3109 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3111 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3112 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3113 class ipcp_param_lattices *src_plats;
3115 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3116 if (src_plats->aggs && src_plats->aggs_by_ref)
3118 /* Currently we do not produce clobber aggregate jump
3119 functions, replace with merging when we do. */
3120 gcc_assert (!jfunc->agg.items);
3121 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3122 ipa_get_jf_ancestor_offset (jfunc));
3124 else if (!src_plats->aggs_by_ref)
3125 ret |= set_agg_lats_to_bottom (dest_plats);
3126 else
3127 ret |= set_agg_lats_contain_variable (dest_plats);
3128 return ret;
3131 if (jfunc->agg.items)
3133 bool pre_existing = dest_plats->aggs != NULL;
3134 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3135 struct ipa_agg_jf_item *item;
3136 int i;
3138 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3139 return true;
3141 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3142 param_ipa_max_agg_items);
3143 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3145 HOST_WIDE_INT val_size;
3147 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3148 continue;
3149 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3151 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3152 &aglat, pre_existing, &ret, max_agg_items))
3154 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3155 aglat = &(*aglat)->next;
3157 else if (dest_plats->aggs_bottom)
3158 return true;
3161 ret |= set_chain_of_aglats_contains_variable (*aglat);
3163 else
3164 ret |= set_agg_lats_contain_variable (dest_plats);
3166 return ret;
3169 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3170 non-thunk) destination, the call passes through a thunk. */
3172 static bool
3173 call_passes_through_thunk (cgraph_edge *cs)
3175 cgraph_node *alias_or_thunk = cs->callee;
3176 while (alias_or_thunk->alias)
3177 alias_or_thunk = alias_or_thunk->get_alias_target ();
3178 return alias_or_thunk->thunk;
3181 /* Propagate constants from the caller to the callee of CS. INFO describes the
3182 caller. */
3184 static bool
3185 propagate_constants_across_call (struct cgraph_edge *cs)
3187 class ipa_node_params *callee_info;
3188 enum availability availability;
3189 cgraph_node *callee;
3190 class ipa_edge_args *args;
3191 bool ret = false;
3192 int i, args_count, parms_count;
3194 callee = cs->callee->function_symbol (&availability);
3195 if (!callee->definition)
3196 return false;
3197 gcc_checking_assert (callee->has_gimple_body_p ());
3198 callee_info = ipa_node_params_sum->get (callee);
3199 if (!callee_info)
3200 return false;
3202 args = ipa_edge_args_sum->get (cs);
3203 parms_count = ipa_get_param_count (callee_info);
3204 if (parms_count == 0)
3205 return false;
3206 if (!args
3207 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3208 || !opt_for_fn (cs->caller->decl, optimize))
3210 for (i = 0; i < parms_count; i++)
3211 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3212 i));
3213 return ret;
3215 args_count = ipa_get_cs_argument_count (args);
3217 /* If this call goes through a thunk we must not propagate to the first (0th)
3218 parameter. However, we might need to uncover a thunk from below a series
3219 of aliases first. */
3220 if (call_passes_through_thunk (cs))
3222 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3223 0));
3224 i = 1;
3226 else
3227 i = 0;
3229 for (; (i < args_count) && (i < parms_count); i++)
3231 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3232 class ipcp_param_lattices *dest_plats;
3233 tree param_type = ipa_get_type (callee_info, i);
3235 dest_plats = ipa_get_parm_lattices (callee_info, i);
3236 if (availability == AVAIL_INTERPOSABLE)
3237 ret |= set_all_contains_variable (dest_plats);
3238 else
3240 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3241 &dest_plats->itself,
3242 param_type);
3243 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3244 &dest_plats->ctxlat);
3246 |= propagate_bits_across_jump_function (cs, i, jump_func,
3247 &dest_plats->bits_lattice);
3248 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3249 dest_plats);
3250 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3251 ret |= propagate_vr_across_jump_function (cs, jump_func,
3252 dest_plats, param_type);
3253 else
3254 ret |= dest_plats->m_value_range.set_to_bottom ();
3257 for (; i < parms_count; i++)
3258 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3260 return ret;
3263 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3264 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3265 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3266 KNOWN_AGGS is ignored. */
3268 static tree
3269 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3270 const vec<tree> &known_csts,
3271 const vec<ipa_polymorphic_call_context> &known_contexts,
3272 const ipa_argagg_value_list &avs,
3273 bool *speculative)
3275 int param_index = ie->indirect_info->param_index;
3276 HOST_WIDE_INT anc_offset;
3277 tree t = NULL;
3278 tree target = NULL;
3280 *speculative = false;
3282 if (param_index == -1)
3283 return NULL_TREE;
3285 if (!ie->indirect_info->polymorphic)
3287 tree t = NULL;
3289 if (ie->indirect_info->agg_contents)
3291 t = NULL;
3292 if ((unsigned) param_index < known_csts.length ()
3293 && known_csts[param_index])
3294 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3295 ie->indirect_info->offset,
3296 ie->indirect_info->by_ref);
3298 if (!t && ie->indirect_info->guaranteed_unmodified)
3299 t = avs.get_value (param_index,
3300 ie->indirect_info->offset / BITS_PER_UNIT,
3301 ie->indirect_info->by_ref);
3303 else if ((unsigned) param_index < known_csts.length ())
3304 t = known_csts[param_index];
3306 if (t
3307 && TREE_CODE (t) == ADDR_EXPR
3308 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3309 return TREE_OPERAND (t, 0);
3310 else
3311 return NULL_TREE;
3314 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3315 return NULL_TREE;
3317 gcc_assert (!ie->indirect_info->agg_contents);
3318 gcc_assert (!ie->indirect_info->by_ref);
3319 anc_offset = ie->indirect_info->offset;
3321 t = NULL;
3323 if ((unsigned) param_index < known_csts.length ()
3324 && known_csts[param_index])
3325 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3326 ie->indirect_info->offset, true);
3328 /* Try to work out value of virtual table pointer value in replacements. */
3329 /* or known aggregate values. */
3330 if (!t)
3331 t = avs.get_value (param_index,
3332 ie->indirect_info->offset / BITS_PER_UNIT,
3333 true);
3335 /* If we found the virtual table pointer, lookup the target. */
3336 if (t)
3338 tree vtable;
3339 unsigned HOST_WIDE_INT offset;
3340 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3342 bool can_refer;
3343 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3344 vtable, offset, &can_refer);
3345 if (can_refer)
3347 if (!target
3348 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3349 || !possible_polymorphic_call_target_p
3350 (ie, cgraph_node::get (target)))
3352 /* Do not speculate builtin_unreachable, it is stupid! */
3353 if (ie->indirect_info->vptr_changed)
3354 return NULL;
3355 target = ipa_impossible_devirt_target (ie, target);
3357 *speculative = ie->indirect_info->vptr_changed;
3358 if (!*speculative)
3359 return target;
3364 /* Do we know the constant value of pointer? */
3365 if (!t && (unsigned) param_index < known_csts.length ())
3366 t = known_csts[param_index];
3368 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3370 ipa_polymorphic_call_context context;
3371 if (known_contexts.length () > (unsigned int) param_index)
3373 context = known_contexts[param_index];
3374 context.offset_by (anc_offset);
3375 if (ie->indirect_info->vptr_changed)
3376 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3377 ie->indirect_info->otr_type);
3378 if (t)
3380 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3381 (t, ie->indirect_info->otr_type, anc_offset);
3382 if (!ctx2.useless_p ())
3383 context.combine_with (ctx2, ie->indirect_info->otr_type);
3386 else if (t)
3388 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3389 anc_offset);
3390 if (ie->indirect_info->vptr_changed)
3391 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3392 ie->indirect_info->otr_type);
3394 else
3395 return NULL_TREE;
3397 vec <cgraph_node *>targets;
3398 bool final;
3400 targets = possible_polymorphic_call_targets
3401 (ie->indirect_info->otr_type,
3402 ie->indirect_info->otr_token,
3403 context, &final);
3404 if (!final || targets.length () > 1)
3406 struct cgraph_node *node;
3407 if (*speculative)
3408 return target;
3409 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3410 || ie->speculative || !ie->maybe_hot_p ())
3411 return NULL;
3412 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3413 ie->indirect_info->otr_token,
3414 context);
3415 if (node)
3417 *speculative = true;
3418 target = node->decl;
3420 else
3421 return NULL;
3423 else
3425 *speculative = false;
3426 if (targets.length () == 1)
3427 target = targets[0]->decl;
3428 else
3429 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3432 if (target && !possible_polymorphic_call_target_p (ie,
3433 cgraph_node::get (target)))
3435 if (*speculative)
3436 return NULL;
3437 target = ipa_impossible_devirt_target (ie, target);
3440 return target;
3443 /* If an indirect edge IE can be turned into a direct one based on data in
3444 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3445 whether the discovered target is only speculative guess. */
3447 tree
3448 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3449 ipa_call_arg_values *avals,
3450 bool *speculative)
3452 ipa_argagg_value_list avl (avals);
3453 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3454 avals->m_known_contexts,
3455 avl, speculative);
3458 /* Calculate devirtualization time bonus for NODE, assuming we know information
3459 about arguments stored in AVALS. */
3461 static int
3462 devirtualization_time_bonus (struct cgraph_node *node,
3463 ipa_auto_call_arg_values *avals)
3465 struct cgraph_edge *ie;
3466 int res = 0;
3468 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3470 struct cgraph_node *callee;
3471 class ipa_fn_summary *isummary;
3472 enum availability avail;
3473 tree target;
3474 bool speculative;
3476 ipa_argagg_value_list avl (avals);
3477 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3478 avals->m_known_contexts,
3479 avl, &speculative);
3480 if (!target)
3481 continue;
3483 /* Only bare minimum benefit for clearly un-inlineable targets. */
3484 res += 1;
3485 callee = cgraph_node::get (target);
3486 if (!callee || !callee->definition)
3487 continue;
3488 callee = callee->function_symbol (&avail);
3489 if (avail < AVAIL_AVAILABLE)
3490 continue;
3491 isummary = ipa_fn_summaries->get (callee);
3492 if (!isummary || !isummary->inlinable)
3493 continue;
3495 int size = ipa_size_summaries->get (callee)->size;
3496 /* FIXME: The values below need re-considering and perhaps also
3497 integrating into the cost metrics, at lest in some very basic way. */
3498 int max_inline_insns_auto
3499 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3500 if (size <= max_inline_insns_auto / 4)
3501 res += 31 / ((int)speculative + 1);
3502 else if (size <= max_inline_insns_auto / 2)
3503 res += 15 / ((int)speculative + 1);
3504 else if (size <= max_inline_insns_auto
3505 || DECL_DECLARED_INLINE_P (callee->decl))
3506 res += 7 / ((int)speculative + 1);
3509 return res;
3512 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3514 static int
3515 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3517 int result = 0;
3518 ipa_hints hints = estimates.hints;
3519 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3520 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3522 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3524 if (hints & INLINE_HINT_loop_iterations)
3525 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3527 if (hints & INLINE_HINT_loop_stride)
3528 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3530 return result;
3533 /* If there is a reason to penalize the function described by INFO in the
3534 cloning goodness evaluation, do so. */
3536 static inline sreal
3537 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3538 sreal evaluation)
3540 if (info->node_within_scc && !info->node_is_self_scc)
3541 evaluation = (evaluation
3542 * (100 - opt_for_fn (node->decl,
3543 param_ipa_cp_recursion_penalty))) / 100;
3545 if (info->node_calling_single_call)
3546 evaluation = (evaluation
3547 * (100 - opt_for_fn (node->decl,
3548 param_ipa_cp_single_call_penalty)))
3549 / 100;
3551 return evaluation;
3554 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3555 and SIZE_COST and with the sum of frequencies of incoming edges to the
3556 potential new clone in FREQUENCIES. */
3558 static bool
3559 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3560 sreal freq_sum, profile_count count_sum,
3561 int size_cost)
3563 if (time_benefit == 0
3564 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3565 || node->optimize_for_size_p ())
3566 return false;
3568 gcc_assert (size_cost > 0);
3570 ipa_node_params *info = ipa_node_params_sum->get (node);
3571 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3572 if (count_sum.nonzero_p ())
3574 gcc_assert (base_count.nonzero_p ());
3575 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3576 sreal evaluation = (time_benefit * factor) / size_cost;
3577 evaluation = incorporate_penalties (node, info, evaluation);
3578 evaluation *= 1000;
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3582 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3583 "size: %i, count_sum: ", time_benefit.to_double (),
3584 size_cost);
3585 count_sum.dump (dump_file);
3586 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3587 info->node_within_scc
3588 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3589 info->node_calling_single_call ? ", single_call" : "",
3590 evaluation.to_double (), eval_threshold);
3593 return evaluation.to_int () >= eval_threshold;
3595 else
3597 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3598 evaluation = incorporate_penalties (node, info, evaluation);
3599 evaluation *= 1000;
3601 if (dump_file && (dump_flags & TDF_DETAILS))
3602 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3603 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3604 "threshold: %i\n",
3605 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3606 info->node_within_scc
3607 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3608 info->node_calling_single_call ? ", single_call" : "",
3609 evaluation.to_double (), eval_threshold);
3611 return evaluation.to_int () >= eval_threshold;
3615 /* Grow vectors in AVALS and fill them with information about values of
3616 parameters that are known to be independent of the context. Only calculate
3617 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3618 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3619 parameters will be stored in it.
3621 TODO: Also grow context independent value range vectors. */
3623 static bool
3624 gather_context_independent_values (class ipa_node_params *info,
3625 ipa_auto_call_arg_values *avals,
3626 bool calculate_aggs,
3627 int *removable_params_cost)
3629 int i, count = ipa_get_param_count (info);
3630 bool ret = false;
3632 avals->m_known_vals.safe_grow_cleared (count, true);
3633 avals->m_known_contexts.safe_grow_cleared (count, true);
3635 if (removable_params_cost)
3636 *removable_params_cost = 0;
3638 for (i = 0; i < count; i++)
3640 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3641 ipcp_lattice<tree> *lat = &plats->itself;
3643 if (lat->is_single_const ())
3645 ipcp_value<tree> *val = lat->values;
3646 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3647 avals->m_known_vals[i] = val->value;
3648 if (removable_params_cost)
3649 *removable_params_cost
3650 += estimate_move_cost (TREE_TYPE (val->value), false);
3651 ret = true;
3653 else if (removable_params_cost
3654 && !ipa_is_param_used (info, i))
3655 *removable_params_cost
3656 += ipa_get_param_move_cost (info, i);
3658 if (!ipa_is_param_used (info, i))
3659 continue;
3661 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3662 /* Do not account known context as reason for cloning. We can see
3663 if it permits devirtualization. */
3664 if (ctxlat->is_single_const ())
3665 avals->m_known_contexts[i] = ctxlat->values->value;
3667 if (calculate_aggs)
3668 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3671 return ret;
3674 /* Perform time and size measurement of NODE with the context given in AVALS,
3675 calculate the benefit compared to the node without specialization and store
3676 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3677 context-independent or unused removable parameters and EST_MOVE_COST, the
3678 estimated movement of the considered parameter. */
3680 static void
3681 perform_estimation_of_a_value (cgraph_node *node,
3682 ipa_auto_call_arg_values *avals,
3683 int removable_params_cost, int est_move_cost,
3684 ipcp_value_base *val)
3686 sreal time_benefit;
3687 ipa_call_estimates estimates;
3689 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3691 /* Extern inline functions have no cloning local time benefits because they
3692 will be inlined anyway. The only reason to clone them is if it enables
3693 optimization in any of the functions they call. */
3694 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3695 time_benefit = 0;
3696 else
3697 time_benefit = (estimates.nonspecialized_time - estimates.time)
3698 + (devirtualization_time_bonus (node, avals)
3699 + hint_time_bonus (node, estimates)
3700 + removable_params_cost + est_move_cost);
3702 int size = estimates.size;
3703 gcc_checking_assert (size >=0);
3704 /* The inliner-heuristics based estimates may think that in certain
3705 contexts some functions do not have any size at all but we want
3706 all specializations to have at least a tiny cost, not least not to
3707 divide by zero. */
3708 if (size == 0)
3709 size = 1;
3711 val->local_time_benefit = time_benefit;
3712 val->local_size_cost = size;
3715 /* Get the overall limit oof growth based on parameters extracted from growth.
3716 it does not really make sense to mix functions with different overall growth
3717 limits but it is possible and if it happens, we do not want to select one
3718 limit at random. */
3720 static long
3721 get_max_overall_size (cgraph_node *node)
3723 long max_new_size = orig_overall_size;
3724 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3725 if (max_new_size < large_unit)
3726 max_new_size = large_unit;
3727 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3728 max_new_size += max_new_size * unit_growth / 100 + 1;
3729 return max_new_size;
3732 /* Return true if NODE should be cloned just for a parameter removal, possibly
3733 dumping a reason if not. */
3735 static bool
3736 clone_for_param_removal_p (cgraph_node *node)
3738 if (!node->can_change_signature)
3740 if (dump_file && (dump_flags & TDF_DETAILS))
3741 fprintf (dump_file, " Not considering cloning to remove parameters, "
3742 "function cannot change signature.\n");
3743 return false;
3745 if (node->can_be_local_p ())
3747 if (dump_file && (dump_flags & TDF_DETAILS))
3748 fprintf (dump_file, " Not considering cloning to remove parameters, "
3749 "IPA-SRA can do it potentially better.\n");
3750 return false;
3752 return true;
3755 /* Iterate over known values of parameters of NODE and estimate the local
3756 effects in terms of time and size they have. */
3758 static void
3759 estimate_local_effects (struct cgraph_node *node)
3761 ipa_node_params *info = ipa_node_params_sum->get (node);
3762 int count = ipa_get_param_count (info);
3763 bool always_const;
3764 int removable_params_cost;
3766 if (!count || !ipcp_versionable_function_p (node))
3767 return;
3769 if (dump_file && (dump_flags & TDF_DETAILS))
3770 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3772 ipa_auto_call_arg_values avals;
3773 always_const = gather_context_independent_values (info, &avals, true,
3774 &removable_params_cost);
3775 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3776 if (always_const || devirt_bonus
3777 || (removable_params_cost && clone_for_param_removal_p (node)))
3779 struct caller_statistics stats;
3780 ipa_call_estimates estimates;
3782 init_caller_stats (&stats);
3783 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3784 false);
3785 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3786 sreal time = estimates.nonspecialized_time - estimates.time;
3787 time += devirt_bonus;
3788 time += hint_time_bonus (node, estimates);
3789 time += removable_params_cost;
3790 int size = estimates.size - stats.n_calls * removable_params_cost;
3792 if (dump_file)
3793 fprintf (dump_file, " - context independent values, size: %i, "
3794 "time_benefit: %f\n", size, (time).to_double ());
3796 if (size <= 0 || node->local)
3798 info->do_clone_for_all_contexts = true;
3800 if (dump_file)
3801 fprintf (dump_file, " Decided to specialize for all "
3802 "known contexts, code not going to grow.\n");
3804 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3805 stats.count_sum, size))
3807 if (size + overall_size <= get_max_overall_size (node))
3809 info->do_clone_for_all_contexts = true;
3810 overall_size += size;
3812 if (dump_file)
3813 fprintf (dump_file, " Decided to specialize for all "
3814 "known contexts, growth (to %li) deemed "
3815 "beneficial.\n", overall_size);
3817 else if (dump_file && (dump_flags & TDF_DETAILS))
3818 fprintf (dump_file, " Not cloning for all contexts because "
3819 "maximum unit size would be reached with %li.\n",
3820 size + overall_size);
3822 else if (dump_file && (dump_flags & TDF_DETAILS))
3823 fprintf (dump_file, " Not cloning for all contexts because "
3824 "!good_cloning_opportunity_p.\n");
3828 for (int i = 0; i < count; i++)
3830 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3831 ipcp_lattice<tree> *lat = &plats->itself;
3832 ipcp_value<tree> *val;
3834 if (lat->bottom
3835 || !lat->values
3836 || avals.m_known_vals[i])
3837 continue;
3839 for (val = lat->values; val; val = val->next)
3841 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3842 avals.m_known_vals[i] = val->value;
3844 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3845 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3846 emc, val);
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3850 fprintf (dump_file, " - estimates for value ");
3851 print_ipcp_constant_value (dump_file, val->value);
3852 fprintf (dump_file, " for ");
3853 ipa_dump_param (dump_file, info, i);
3854 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3855 val->local_time_benefit.to_double (),
3856 val->local_size_cost);
3859 avals.m_known_vals[i] = NULL_TREE;
3862 for (int i = 0; i < count; i++)
3864 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3866 if (!plats->virt_call)
3867 continue;
3869 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3870 ipcp_value<ipa_polymorphic_call_context> *val;
3872 if (ctxlat->bottom
3873 || !ctxlat->values
3874 || !avals.m_known_contexts[i].useless_p ())
3875 continue;
3877 for (val = ctxlat->values; val; val = val->next)
3879 avals.m_known_contexts[i] = val->value;
3880 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3881 0, val);
3883 if (dump_file && (dump_flags & TDF_DETAILS))
3885 fprintf (dump_file, " - estimates for polymorphic context ");
3886 print_ipcp_constant_value (dump_file, val->value);
3887 fprintf (dump_file, " for ");
3888 ipa_dump_param (dump_file, info, i);
3889 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3890 val->local_time_benefit.to_double (),
3891 val->local_size_cost);
3894 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3897 unsigned all_ctx_len = avals.m_known_aggs.length ();
3898 auto_vec<ipa_argagg_value, 32> all_ctx;
3899 all_ctx.reserve_exact (all_ctx_len);
3900 all_ctx.splice (avals.m_known_aggs);
3901 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3903 unsigned j = 0;
3904 for (int index = 0; index < count; index++)
3906 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3908 if (plats->aggs_bottom || !plats->aggs)
3909 continue;
3911 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3913 ipcp_value<tree> *val;
3914 if (aglat->bottom || !aglat->values
3915 /* If the following is true, the one value is already part of all
3916 context estimations. */
3917 || (!plats->aggs_contain_variable
3918 && aglat->is_single_const ()))
3919 continue;
3921 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3922 while (j < all_ctx_len
3923 && (all_ctx[j].index < index
3924 || (all_ctx[j].index == index
3925 && all_ctx[j].unit_offset < unit_offset)))
3927 avals.m_known_aggs[j] = all_ctx[j];
3928 j++;
3931 for (unsigned k = j; k < all_ctx_len; k++)
3932 avals.m_known_aggs[k+1] = all_ctx[k];
3934 for (val = aglat->values; val; val = val->next)
3936 avals.m_known_aggs[j].value = val->value;
3937 avals.m_known_aggs[j].unit_offset = unit_offset;
3938 avals.m_known_aggs[j].index = index;
3939 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3941 perform_estimation_of_a_value (node, &avals,
3942 removable_params_cost, 0, val);
3944 if (dump_file && (dump_flags & TDF_DETAILS))
3946 fprintf (dump_file, " - estimates for value ");
3947 print_ipcp_constant_value (dump_file, val->value);
3948 fprintf (dump_file, " for ");
3949 ipa_dump_param (dump_file, info, index);
3950 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3951 "]: time_benefit: %g, size: %i\n",
3952 plats->aggs_by_ref ? "ref " : "",
3953 aglat->offset,
3954 val->local_time_benefit.to_double (),
3955 val->local_size_cost);
3963 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3964 topological sort of values. */
3966 template <typename valtype>
3967 void
3968 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3970 ipcp_value_source<valtype> *src;
3972 if (cur_val->dfs)
3973 return;
3975 dfs_counter++;
3976 cur_val->dfs = dfs_counter;
3977 cur_val->low_link = dfs_counter;
3979 cur_val->topo_next = stack;
3980 stack = cur_val;
3981 cur_val->on_stack = true;
3983 for (src = cur_val->sources; src; src = src->next)
3984 if (src->val)
3986 if (src->val->dfs == 0)
3988 add_val (src->val);
3989 if (src->val->low_link < cur_val->low_link)
3990 cur_val->low_link = src->val->low_link;
3992 else if (src->val->on_stack
3993 && src->val->dfs < cur_val->low_link)
3994 cur_val->low_link = src->val->dfs;
3997 if (cur_val->dfs == cur_val->low_link)
3999 ipcp_value<valtype> *v, *scc_list = NULL;
4003 v = stack;
4004 stack = v->topo_next;
4005 v->on_stack = false;
4006 v->scc_no = cur_val->dfs;
4008 v->scc_next = scc_list;
4009 scc_list = v;
4011 while (v != cur_val);
4013 cur_val->topo_next = values_topo;
4014 values_topo = cur_val;
4018 /* Add all values in lattices associated with NODE to the topological sort if
4019 they are not there yet. */
4021 static void
4022 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
4024 ipa_node_params *info = ipa_node_params_sum->get (node);
4025 int i, count = ipa_get_param_count (info);
4027 for (i = 0; i < count; i++)
4029 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4030 ipcp_lattice<tree> *lat = &plats->itself;
4031 struct ipcp_agg_lattice *aglat;
4033 if (!lat->bottom)
4035 ipcp_value<tree> *val;
4036 for (val = lat->values; val; val = val->next)
4037 topo->constants.add_val (val);
4040 if (!plats->aggs_bottom)
4041 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4042 if (!aglat->bottom)
4044 ipcp_value<tree> *val;
4045 for (val = aglat->values; val; val = val->next)
4046 topo->constants.add_val (val);
4049 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4050 if (!ctxlat->bottom)
4052 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4053 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4054 topo->contexts.add_val (ctxval);
4059 /* One pass of constants propagation along the call graph edges, from callers
4060 to callees (requires topological ordering in TOPO), iterate over strongly
4061 connected components. */
4063 static void
4064 propagate_constants_topo (class ipa_topo_info *topo)
4066 int i;
4068 for (i = topo->nnodes - 1; i >= 0; i--)
4070 unsigned j;
4071 struct cgraph_node *v, *node = topo->order[i];
4072 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4074 /* First, iteratively propagate within the strongly connected component
4075 until all lattices stabilize. */
4076 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4077 if (v->has_gimple_body_p ())
4079 if (opt_for_fn (v->decl, flag_ipa_cp)
4080 && opt_for_fn (v->decl, optimize))
4081 push_node_to_stack (topo, v);
4082 /* When V is not optimized, we can not push it to stack, but
4083 still we need to set all its callees lattices to bottom. */
4084 else
4086 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4087 propagate_constants_across_call (cs);
4091 v = pop_node_from_stack (topo);
4092 while (v)
4094 struct cgraph_edge *cs;
4095 class ipa_node_params *info = NULL;
4096 bool self_scc = true;
4098 for (cs = v->callees; cs; cs = cs->next_callee)
4099 if (ipa_edge_within_scc (cs))
4101 cgraph_node *callee = cs->callee->function_symbol ();
4103 if (v != callee)
4104 self_scc = false;
4106 if (!info)
4108 info = ipa_node_params_sum->get (v);
4109 info->node_within_scc = true;
4112 if (propagate_constants_across_call (cs))
4113 push_node_to_stack (topo, callee);
4116 if (info)
4117 info->node_is_self_scc = self_scc;
4119 v = pop_node_from_stack (topo);
4122 /* Afterwards, propagate along edges leading out of the SCC, calculates
4123 the local effects of the discovered constants and all valid values to
4124 their topological sort. */
4125 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4126 if (v->has_gimple_body_p ()
4127 && opt_for_fn (v->decl, flag_ipa_cp)
4128 && opt_for_fn (v->decl, optimize))
4130 struct cgraph_edge *cs;
4132 estimate_local_effects (v);
4133 add_all_node_vals_to_toposort (v, topo);
4134 for (cs = v->callees; cs; cs = cs->next_callee)
4135 if (!ipa_edge_within_scc (cs))
4136 propagate_constants_across_call (cs);
4138 cycle_nodes.release ();
4142 /* Propagate the estimated effects of individual values along the topological
4143 from the dependent values to those they depend on. */
4145 template <typename valtype>
4146 void
4147 value_topo_info<valtype>::propagate_effects ()
4149 ipcp_value<valtype> *base;
4150 hash_set<ipcp_value<valtype> *> processed_srcvals;
4152 for (base = values_topo; base; base = base->topo_next)
4154 ipcp_value_source<valtype> *src;
4155 ipcp_value<valtype> *val;
4156 sreal time = 0;
4157 HOST_WIDE_INT size = 0;
4159 for (val = base; val; val = val->scc_next)
4161 time = time + val->local_time_benefit + val->prop_time_benefit;
4162 size = size + val->local_size_cost + val->prop_size_cost;
4165 for (val = base; val; val = val->scc_next)
4167 processed_srcvals.empty ();
4168 for (src = val->sources; src; src = src->next)
4169 if (src->val
4170 && src->cs->maybe_hot_p ())
4172 if (!processed_srcvals.add (src->val))
4174 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4175 if (prop_size < INT_MAX)
4176 src->val->prop_size_cost = prop_size;
4177 else
4178 continue;
4181 int special_factor = 1;
4182 if (val->same_scc (src->val))
4183 special_factor
4184 = opt_for_fn(src->cs->caller->decl,
4185 param_ipa_cp_recursive_freq_factor);
4186 else if (val->self_recursion_generated_p ()
4187 && (src->cs->callee->function_symbol ()
4188 == src->cs->caller))
4190 int max_recur_gen_depth
4191 = opt_for_fn(src->cs->caller->decl,
4192 param_ipa_cp_max_recursive_depth);
4193 special_factor = max_recur_gen_depth
4194 - val->self_recursion_generated_level + 1;
4197 src->val->prop_time_benefit
4198 += time * special_factor * src->cs->sreal_frequency ();
4201 if (size < INT_MAX)
4203 val->prop_time_benefit = time;
4204 val->prop_size_cost = size;
4206 else
4208 val->prop_time_benefit = 0;
4209 val->prop_size_cost = 0;
4215 /* Callback for qsort to sort counts of all edges. */
4217 static int
4218 compare_edge_profile_counts (const void *a, const void *b)
4220 const profile_count *cnt1 = (const profile_count *) a;
4221 const profile_count *cnt2 = (const profile_count *) b;
4223 if (*cnt1 < *cnt2)
4224 return 1;
4225 if (*cnt1 > *cnt2)
4226 return -1;
4227 return 0;
4231 /* Propagate constants, polymorphic contexts and their effects from the
4232 summaries interprocedurally. */
4234 static void
4235 ipcp_propagate_stage (class ipa_topo_info *topo)
4237 struct cgraph_node *node;
4239 if (dump_file)
4240 fprintf (dump_file, "\n Propagating constants:\n\n");
4242 base_count = profile_count::uninitialized ();
4244 bool compute_count_base = false;
4245 unsigned base_count_pos_percent = 0;
4246 FOR_EACH_DEFINED_FUNCTION (node)
4248 if (node->has_gimple_body_p ()
4249 && opt_for_fn (node->decl, flag_ipa_cp)
4250 && opt_for_fn (node->decl, optimize))
4252 ipa_node_params *info = ipa_node_params_sum->get (node);
4253 determine_versionability (node, info);
4255 unsigned nlattices = ipa_get_param_count (info);
4256 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4257 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4258 initialize_node_lattices (node);
4260 ipa_size_summary *s = ipa_size_summaries->get (node);
4261 if (node->definition && !node->alias && s != NULL)
4262 overall_size += s->self_size;
4263 if (node->count.ipa ().initialized_p ())
4265 compute_count_base = true;
4266 unsigned pos_percent = opt_for_fn (node->decl,
4267 param_ipa_cp_profile_count_base);
4268 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4272 if (compute_count_base)
4274 auto_vec<profile_count> all_edge_counts;
4275 all_edge_counts.reserve_exact (symtab->edges_count);
4276 FOR_EACH_DEFINED_FUNCTION (node)
4277 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4279 profile_count count = cs->count.ipa ();
4280 if (!count.nonzero_p ())
4281 continue;
4283 enum availability avail;
4284 cgraph_node *tgt
4285 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4286 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4287 if (info && info->versionable)
4288 all_edge_counts.quick_push (count);
4291 if (!all_edge_counts.is_empty ())
4293 gcc_assert (base_count_pos_percent <= 100);
4294 all_edge_counts.qsort (compare_edge_profile_counts);
4296 unsigned base_count_pos
4297 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4298 base_count = all_edge_counts[base_count_pos];
4300 if (dump_file)
4302 fprintf (dump_file, "\nSelected base_count from %u edges at "
4303 "position %u, arriving at: ", all_edge_counts.length (),
4304 base_count_pos);
4305 base_count.dump (dump_file);
4306 fprintf (dump_file, "\n");
4309 else if (dump_file)
4310 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4311 "continuing as if without profile feedback.\n");
4314 orig_overall_size = overall_size;
4316 if (dump_file)
4317 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4319 propagate_constants_topo (topo);
4320 if (flag_checking)
4321 ipcp_verify_propagated_values ();
4322 topo->constants.propagate_effects ();
4323 topo->contexts.propagate_effects ();
4325 if (dump_file)
4327 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4328 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4332 /* Discover newly direct outgoing edges from NODE which is a new clone with
4333 known KNOWN_CSTS and make them direct. */
4335 static void
4336 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4337 vec<tree> known_csts,
4338 vec<ipa_polymorphic_call_context>
4339 known_contexts,
4340 vec<ipa_argagg_value, va_gc> *aggvals)
4342 struct cgraph_edge *ie, *next_ie;
4343 bool found = false;
4345 for (ie = node->indirect_calls; ie; ie = next_ie)
4347 tree target;
4348 bool speculative;
4350 next_ie = ie->next_callee;
4351 ipa_argagg_value_list avs (aggvals);
4352 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4353 avs, &speculative);
4354 if (target)
4356 bool agg_contents = ie->indirect_info->agg_contents;
4357 bool polymorphic = ie->indirect_info->polymorphic;
4358 int param_index = ie->indirect_info->param_index;
4359 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4360 speculative);
4361 found = true;
4363 if (cs && !agg_contents && !polymorphic)
4365 ipa_node_params *info = ipa_node_params_sum->get (node);
4366 int c = ipa_get_controlled_uses (info, param_index);
4367 if (c != IPA_UNDESCRIBED_USE
4368 && !ipa_get_param_load_dereferenced (info, param_index))
4370 struct ipa_ref *to_del;
4372 c--;
4373 ipa_set_controlled_uses (info, param_index, c);
4374 if (dump_file && (dump_flags & TDF_DETAILS))
4375 fprintf (dump_file, " controlled uses count of param "
4376 "%i bumped down to %i\n", param_index, c);
4377 if (c == 0
4378 && (to_del = node->find_reference (cs->callee, NULL, 0,
4379 IPA_REF_ADDR)))
4381 if (dump_file && (dump_flags & TDF_DETAILS))
4382 fprintf (dump_file, " and even removing its "
4383 "cloning-created reference\n");
4384 to_del->remove_reference ();
4390 /* Turning calls to direct calls will improve overall summary. */
4391 if (found)
4392 ipa_update_overall_fn_summary (node);
4395 class edge_clone_summary;
4396 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4398 /* Edge clone summary. */
4400 class edge_clone_summary
4402 public:
4403 /* Default constructor. */
4404 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4406 /* Default destructor. */
4407 ~edge_clone_summary ()
4409 if (prev_clone)
4410 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4411 if (next_clone)
4412 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4415 cgraph_edge *prev_clone;
4416 cgraph_edge *next_clone;
4419 class edge_clone_summary_t:
4420 public call_summary <edge_clone_summary *>
4422 public:
4423 edge_clone_summary_t (symbol_table *symtab):
4424 call_summary <edge_clone_summary *> (symtab)
4426 m_initialize_when_cloning = true;
4429 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4430 edge_clone_summary *src_data,
4431 edge_clone_summary *dst_data) final override;
4434 /* Edge duplication hook. */
4436 void
4437 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4438 edge_clone_summary *src_data,
4439 edge_clone_summary *dst_data)
4441 if (src_data->next_clone)
4442 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4443 dst_data->prev_clone = src_edge;
4444 dst_data->next_clone = src_data->next_clone;
4445 src_data->next_clone = dst_edge;
4448 /* Return true is CS calls DEST or its clone for all contexts. When
4449 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4450 edges from/to an all-context clone. */
4452 static bool
4453 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4454 bool allow_recursion_to_clone)
4456 enum availability availability;
4457 cgraph_node *callee = cs->callee->function_symbol (&availability);
4459 if (availability <= AVAIL_INTERPOSABLE)
4460 return false;
4461 if (callee == dest)
4462 return true;
4463 if (!allow_recursion_to_clone && cs->caller == callee)
4464 return false;
4466 ipa_node_params *info = ipa_node_params_sum->get (callee);
4467 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4470 /* Return true if edge CS does bring about the value described by SRC to
4471 DEST_VAL of node DEST or its clone for all contexts. */
4473 static bool
4474 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4475 cgraph_node *dest, ipcp_value<tree> *dest_val)
4477 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4479 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4480 || caller_info->node_dead)
4481 return false;
4483 if (!src->val)
4484 return true;
4486 if (caller_info->ipcp_orig_node)
4488 tree t = NULL_TREE;
4489 if (src->offset == -1)
4490 t = caller_info->known_csts[src->index];
4491 else if (ipcp_transformation *ts
4492 = ipcp_get_transformation_summary (cs->caller))
4494 ipa_argagg_value_list avl (ts);
4495 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4497 return (t != NULL_TREE
4498 && values_equal_for_ipcp_p (src->val->value, t));
4500 else
4502 if (src->val == dest_val)
4503 return true;
4505 struct ipcp_agg_lattice *aglat;
4506 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4507 src->index);
4508 if (src->offset == -1)
4509 return (plats->itself.is_single_const ()
4510 && values_equal_for_ipcp_p (src->val->value,
4511 plats->itself.values->value));
4512 else
4514 if (plats->aggs_bottom || plats->aggs_contain_variable)
4515 return false;
4516 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4517 if (aglat->offset == src->offset)
4518 return (aglat->is_single_const ()
4519 && values_equal_for_ipcp_p (src->val->value,
4520 aglat->values->value));
4522 return false;
4526 /* Return true if edge CS does bring about the value described by SRC to
4527 DST_VAL of node DEST or its clone for all contexts. */
4529 static bool
4530 cgraph_edge_brings_value_p (cgraph_edge *cs,
4531 ipcp_value_source<ipa_polymorphic_call_context> *src,
4532 cgraph_node *dest,
4533 ipcp_value<ipa_polymorphic_call_context> *)
4535 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4537 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4538 || caller_info->node_dead)
4539 return false;
4540 if (!src->val)
4541 return true;
4543 if (caller_info->ipcp_orig_node)
4544 return (caller_info->known_contexts.length () > (unsigned) src->index)
4545 && values_equal_for_ipcp_p (src->val->value,
4546 caller_info->known_contexts[src->index]);
4548 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4549 src->index);
4550 return plats->ctxlat.is_single_const ()
4551 && values_equal_for_ipcp_p (src->val->value,
4552 plats->ctxlat.values->value);
4555 /* Get the next clone in the linked list of clones of an edge. */
4557 static inline struct cgraph_edge *
4558 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4560 edge_clone_summary *s = edge_clone_summaries->get (cs);
4561 return s != NULL ? s->next_clone : NULL;
4564 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4565 of them is viable and hot, return true. In that case, for those that still
4566 hold, add their edge frequency and their number and cumulative profile
4567 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4568 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4570 template <typename valtype>
4571 static bool
4572 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4573 sreal *freq_sum, int *caller_count,
4574 profile_count *rec_count_sum,
4575 profile_count *nonrec_count_sum)
4577 ipcp_value_source<valtype> *src;
4578 sreal freq = 0;
4579 int count = 0;
4580 profile_count rec_cnt = profile_count::zero ();
4581 profile_count nonrec_cnt = profile_count::zero ();
4582 bool hot = false;
4583 bool non_self_recursive = false;
4585 for (src = val->sources; src; src = src->next)
4587 struct cgraph_edge *cs = src->cs;
4588 while (cs)
4590 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4592 count++;
4593 freq += cs->sreal_frequency ();
4594 hot |= cs->maybe_hot_p ();
4595 if (cs->caller != dest)
4597 non_self_recursive = true;
4598 if (cs->count.ipa ().initialized_p ())
4599 rec_cnt += cs->count.ipa ();
4601 else if (cs->count.ipa ().initialized_p ())
4602 nonrec_cnt += cs->count.ipa ();
4604 cs = get_next_cgraph_edge_clone (cs);
4608 /* If the only edges bringing a value are self-recursive ones, do not bother
4609 evaluating it. */
4610 if (!non_self_recursive)
4611 return false;
4613 *freq_sum = freq;
4614 *caller_count = count;
4615 *rec_count_sum = rec_cnt;
4616 *nonrec_count_sum = nonrec_cnt;
4618 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4620 struct cgraph_edge *cs;
4622 /* Cold non-SCC source edge could trigger hot recursive execution of
4623 function. Consider the case as hot and rely on following cost model
4624 computation to further select right one. */
4625 for (cs = dest->callers; cs; cs = cs->next_caller)
4626 if (cs->caller == dest && cs->maybe_hot_p ())
4627 return true;
4630 return hot;
4633 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4634 to let a non-self-recursive caller be the first element. Thus, we can
4635 simplify intersecting operations on values that arrive from all of these
4636 callers, especially when there exists self-recursive call. Return true if
4637 this kind of adjustment is possible. */
4639 static bool
4640 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4641 cgraph_node *node)
4643 for (unsigned i = 0; i < callers.length (); i++)
4645 cgraph_edge *cs = callers[i];
4647 if (cs->caller != node)
4649 if (i > 0)
4651 callers[i] = callers[0];
4652 callers[0] = cs;
4654 return true;
4657 return false;
4660 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4661 is assumed their number is known and equal to CALLER_COUNT. */
4663 template <typename valtype>
4664 static vec<cgraph_edge *>
4665 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4666 int caller_count)
4668 ipcp_value_source<valtype> *src;
4669 vec<cgraph_edge *> ret;
4671 ret.create (caller_count);
4672 for (src = val->sources; src; src = src->next)
4674 struct cgraph_edge *cs = src->cs;
4675 while (cs)
4677 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4678 ret.quick_push (cs);
4679 cs = get_next_cgraph_edge_clone (cs);
4683 if (caller_count > 1)
4684 adjust_callers_for_value_intersection (ret, dest);
4686 return ret;
4689 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4690 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4691 should be set to true when the reference created for the constant should be
4692 a load one and not an address one because the corresponding parameter p is
4693 only used as *p. */
4695 static struct ipa_replace_map *
4696 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4697 bool force_load_ref)
4699 struct ipa_replace_map *replace_map;
4701 replace_map = ggc_alloc<ipa_replace_map> ();
4702 if (dump_file)
4704 fprintf (dump_file, " replacing ");
4705 ipa_dump_param (dump_file, info, parm_num);
4707 fprintf (dump_file, " with const ");
4708 print_generic_expr (dump_file, value);
4710 if (force_load_ref)
4711 fprintf (dump_file, " - forcing load reference\n");
4712 else
4713 fprintf (dump_file, "\n");
4715 replace_map->parm_num = parm_num;
4716 replace_map->new_tree = value;
4717 replace_map->force_load_ref = force_load_ref;
4718 return replace_map;
4721 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4722 one, otherwise it will be referred to as the original node. */
4724 static void
4725 dump_profile_updates (cgraph_node *node, bool spec)
4727 if (spec)
4728 fprintf (dump_file, " setting count of the specialized node %s to ",
4729 node->dump_name ());
4730 else
4731 fprintf (dump_file, " setting count of the original node %s to ",
4732 node->dump_name ());
4734 node->count.dump (dump_file);
4735 fprintf (dump_file, "\n");
4736 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4738 fprintf (dump_file, " edge to %s has count ",
4739 cs->callee->dump_name ());
4740 cs->count.dump (dump_file);
4741 fprintf (dump_file, "\n");
4745 /* With partial train run we do not want to assume that original's count is
4746 zero whenever we redurect all executed edges to clone. Simply drop profile
4747 to local one in this case. In eany case, return the new value. ORIG_NODE
4748 is the original node and its count has not been updaed yet. */
4750 profile_count
4751 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4753 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4754 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4755 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4756 remainder = remainder.guessed_local ();
4758 return remainder;
4761 /* Structure to sum counts coming from nodes other than the original node and
4762 its clones. */
4764 struct gather_other_count_struct
4766 cgraph_node *orig;
4767 profile_count other_count;
4770 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4771 counts that come from non-self-recursive calls.. */
4773 static bool
4774 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4776 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4777 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4778 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4779 desc->other_count += cs->count.ipa ();
4780 return false;
4783 /* Structure to help analyze if we need to boost counts of some clones of some
4784 non-recursive edges to match the new callee count. */
4786 struct desc_incoming_count_struct
4788 cgraph_node *orig;
4789 hash_set <cgraph_edge *> *processed_edges;
4790 profile_count count;
4791 unsigned unproc_orig_rec_edges;
4794 /* Go over edges calling NODE and its thunks and gather information about
4795 incoming counts so that we know if we need to make any adjustments. */
4797 static void
4798 analyze_clone_icoming_counts (cgraph_node *node,
4799 desc_incoming_count_struct *desc)
4801 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4802 if (cs->caller->thunk)
4804 analyze_clone_icoming_counts (cs->caller, desc);
4805 continue;
4807 else
4809 if (cs->count.initialized_p ())
4810 desc->count += cs->count.ipa ();
4811 if (!desc->processed_edges->contains (cs)
4812 && cs->caller->clone_of == desc->orig)
4813 desc->unproc_orig_rec_edges++;
4817 /* If caller edge counts of a clone created for a self-recursive arithmetic
4818 jump function must be adjusted because it is coming from a the "seed" clone
4819 for the first value and so has been excessively scaled back as if it was not
4820 a recursive call, adjust it so that the incoming counts of NODE match its
4821 count. NODE is the node or its thunk. */
4823 static void
4824 adjust_clone_incoming_counts (cgraph_node *node,
4825 desc_incoming_count_struct *desc)
4827 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4828 if (cs->caller->thunk)
4830 adjust_clone_incoming_counts (cs->caller, desc);
4831 profile_count sum = profile_count::zero ();
4832 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4833 if (e->count.initialized_p ())
4834 sum += e->count.ipa ();
4835 cs->count = cs->count.combine_with_ipa_count (sum);
4837 else if (!desc->processed_edges->contains (cs)
4838 && cs->caller->clone_of == desc->orig)
4840 cs->count += desc->count;
4841 if (dump_file)
4843 fprintf (dump_file, " Adjusted count of an incoming edge of "
4844 "a clone %s -> %s to ", cs->caller->dump_name (),
4845 cs->callee->dump_name ());
4846 cs->count.dump (dump_file);
4847 fprintf (dump_file, "\n");
4852 /* When ORIG_NODE has been cloned for values which have been generated fora
4853 self-recursive call as a result of an arithmetic pass-through
4854 jump-functions, adjust its count together with counts of all such clones in
4855 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4857 The function sums the counts of the original node and all its clones that
4858 cannot be attributed to a specific clone because it comes from a
4859 non-recursive edge. This sum is then evenly divided between the clones and
4860 on top of that each one gets all the counts which can be attributed directly
4861 to it. */
4863 static void
4864 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4865 const vec<cgraph_node *> &self_gen_clones)
4867 profile_count redist_sum = orig_node->count.ipa ();
4868 if (!(redist_sum > profile_count::zero ()))
4869 return;
4871 if (dump_file)
4872 fprintf (dump_file, " Updating profile of self recursive clone "
4873 "series\n");
4875 gather_other_count_struct gocs;
4876 gocs.orig = orig_node;
4877 gocs.other_count = profile_count::zero ();
4879 auto_vec <profile_count, 8> other_edges_count;
4880 for (cgraph_node *n : self_gen_clones)
4882 gocs.other_count = profile_count::zero ();
4883 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4884 &gocs, false);
4885 other_edges_count.safe_push (gocs.other_count);
4886 redist_sum -= gocs.other_count;
4889 hash_set<cgraph_edge *> processed_edges;
4890 unsigned i = 0;
4891 for (cgraph_node *n : self_gen_clones)
4893 profile_count orig_count = n->count;
4894 profile_count new_count
4895 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4896 new_count = lenient_count_portion_handling (new_count, orig_node);
4897 n->count = new_count;
4898 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4899 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4901 cs->count = cs->count.apply_scale (new_count, orig_count);
4902 processed_edges.add (cs);
4904 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4905 cs->count = cs->count.apply_scale (new_count, orig_count);
4907 i++;
4910 /* There are still going to be edges to ORIG_NODE that have one or more
4911 clones coming from another node clone in SELF_GEN_CLONES and which we
4912 scaled by the same amount, which means that the total incoming sum of
4913 counts to ORIG_NODE will be too high, scale such edges back. */
4914 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4916 if (cs->callee->ultimate_alias_target () == orig_node)
4918 unsigned den = 0;
4919 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4920 if (e->callee->ultimate_alias_target () == orig_node
4921 && processed_edges.contains (e))
4922 den++;
4923 if (den > 0)
4924 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4925 if (e->callee->ultimate_alias_target () == orig_node
4926 && processed_edges.contains (e))
4927 e->count /= den;
4931 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4932 along self-recursive edges are likely to have fairly low count and so
4933 edges from them to nodes in the self_gen_clones do not correspond to the
4934 artificially distributed count of the nodes, the total sum of incoming
4935 edges to some clones might be too low. Detect this situation and correct
4936 it. */
4937 for (cgraph_node *n : self_gen_clones)
4939 if (!(n->count.ipa () > profile_count::zero ()))
4940 continue;
4942 desc_incoming_count_struct desc;
4943 desc.orig = orig_node;
4944 desc.processed_edges = &processed_edges;
4945 desc.count = profile_count::zero ();
4946 desc.unproc_orig_rec_edges = 0;
4947 analyze_clone_icoming_counts (n, &desc);
4949 if (n->count.differs_from_p (desc.count))
4951 if (n->count > desc.count
4952 && desc.unproc_orig_rec_edges > 0)
4954 desc.count = n->count - desc.count;
4955 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4956 adjust_clone_incoming_counts (n, &desc);
4958 else if (dump_file)
4959 fprintf (dump_file,
4960 " Unable to fix up incoming counts for %s.\n",
4961 n->dump_name ());
4965 if (dump_file)
4966 for (cgraph_node *n : self_gen_clones)
4967 dump_profile_updates (n, n != orig_node);
4968 return;
4971 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4972 their profile information to reflect this. This function should not be used
4973 for clones generated for arithmetic pass-through jump functions on a
4974 self-recursive call graph edge, that situation is handled by
4975 update_counts_for_self_gen_clones. */
4977 static void
4978 update_profiling_info (struct cgraph_node *orig_node,
4979 struct cgraph_node *new_node)
4981 struct caller_statistics stats;
4982 profile_count new_sum;
4983 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4985 if (!(orig_node_count > profile_count::zero ()))
4986 return;
4988 if (dump_file)
4990 fprintf (dump_file, " Updating profile from original count: ");
4991 orig_node_count.dump (dump_file);
4992 fprintf (dump_file, "\n");
4995 init_caller_stats (&stats, new_node);
4996 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4997 false);
4998 new_sum = stats.count_sum;
5000 bool orig_edges_processed = false;
5001 if (new_sum > orig_node_count)
5003 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5004 to global0 category. */
5005 remainder = orig_node->count.global0 ();
5007 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5008 cs->count = cs->count.global0 ();
5009 for (cgraph_edge *cs = orig_node->indirect_calls;
5011 cs = cs->next_callee)
5012 cs->count = cs->count.global0 ();
5013 orig_edges_processed = true;
5015 else if (stats.rec_count_sum.nonzero_p ())
5017 int new_nonrec_calls = stats.n_nonrec_calls;
5018 /* There are self-recursive edges which are likely to bring in the
5019 majority of calls but which we must divide in between the original and
5020 new node. */
5021 init_caller_stats (&stats, orig_node);
5022 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
5023 &stats, false);
5024 int orig_nonrec_calls = stats.n_nonrec_calls;
5025 profile_count orig_nonrec_call_count = stats.count_sum;
5027 if (orig_node->local)
5029 if (!orig_nonrec_call_count.nonzero_p ())
5031 if (dump_file)
5032 fprintf (dump_file, " The original is local and the only "
5033 "incoming edges from non-dead callers with nonzero "
5034 "counts are self-recursive, assuming it is cold.\n");
5035 /* The NEW_NODE count and counts of all its outgoing edges
5036 are still unmodified copies of ORIG_NODE's. Just clear
5037 the latter and bail out. */
5038 profile_count zero;
5039 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5040 zero = profile_count::zero ().guessed_local ();
5041 else
5042 zero = profile_count::adjusted_zero ();
5043 orig_node->count = zero;
5044 for (cgraph_edge *cs = orig_node->callees;
5046 cs = cs->next_callee)
5047 cs->count = zero;
5048 for (cgraph_edge *cs = orig_node->indirect_calls;
5050 cs = cs->next_callee)
5051 cs->count = zero;
5052 return;
5055 else
5057 /* Let's behave as if there was another caller that accounts for all
5058 the calls that were either indirect or from other compilation
5059 units. */
5060 orig_nonrec_calls++;
5061 profile_count pretend_caller_count
5062 = (orig_node_count - new_sum - orig_nonrec_call_count
5063 - stats.rec_count_sum);
5064 orig_nonrec_call_count += pretend_caller_count;
5067 /* Divide all "unexplained" counts roughly proportionally to sums of
5068 counts of non-recursive calls.
5070 We put rather arbitrary limits on how many counts we claim because the
5071 number of non-self-recursive incoming count is only a rough guideline
5072 and there are cases (such as mcf) where using it blindly just takes
5073 too many. And if lattices are considered in the opposite order we
5074 could also take too few. */
5075 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5077 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5078 profile_count new_part
5079 = MAX(MIN (unexp.apply_scale (new_sum,
5080 new_sum + orig_nonrec_call_count),
5081 unexp.apply_scale (limit_den - 1, limit_den)),
5082 unexp.apply_scale (new_nonrec_calls, limit_den));
5083 if (dump_file)
5085 fprintf (dump_file, " Claiming ");
5086 new_part.dump (dump_file);
5087 fprintf (dump_file, " of unexplained ");
5088 unexp.dump (dump_file);
5089 fprintf (dump_file, " counts because of self-recursive "
5090 "calls\n");
5092 new_sum += new_part;
5093 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5094 orig_node);
5096 else
5097 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5098 orig_node);
5100 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5101 new_node->count = new_sum;
5102 orig_node->count = remainder;
5104 profile_count orig_new_node_count = orig_node_count;
5105 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5106 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5107 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5108 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5109 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5111 if (!orig_edges_processed)
5113 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5114 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5115 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5116 for (cgraph_edge *cs = orig_node->indirect_calls;
5118 cs = cs->next_callee)
5119 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5122 if (dump_file)
5124 dump_profile_updates (new_node, true);
5125 dump_profile_updates (orig_node, false);
5129 /* Update the respective profile of specialized NEW_NODE and the original
5130 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5131 have been redirected to the specialized version. */
5133 static void
5134 update_specialized_profile (struct cgraph_node *new_node,
5135 struct cgraph_node *orig_node,
5136 profile_count redirected_sum)
5138 struct cgraph_edge *cs;
5139 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
5141 if (dump_file)
5143 fprintf (dump_file, " the sum of counts of redirected edges is ");
5144 redirected_sum.dump (dump_file);
5145 fprintf (dump_file, "\n old ipa count of the original node is ");
5146 orig_node_count.dump (dump_file);
5147 fprintf (dump_file, "\n");
5149 if (!(orig_node_count > profile_count::zero ()))
5150 return;
5152 new_node_count = new_node->count;
5153 new_node->count += redirected_sum;
5154 orig_node->count
5155 = lenient_count_portion_handling (orig_node->count - redirected_sum,
5156 orig_node);
5158 for (cs = new_node->callees; cs; cs = cs->next_callee)
5159 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5161 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5163 profile_count dec = cs->count.apply_scale (redirected_sum,
5164 orig_node_count);
5165 cs->count -= dec;
5168 if (dump_file)
5170 dump_profile_updates (new_node, true);
5171 dump_profile_updates (orig_node, false);
5175 static void adjust_references_in_caller (cgraph_edge *cs,
5176 symtab_node *symbol, int index);
5178 /* Simple structure to pass a symbol and index (with same meaning as parameters
5179 of adjust_references_in_caller) through a void* parameter of a
5180 call_for_symbol_thunks_and_aliases callback. */
5181 struct symbol_and_index_together
5183 symtab_node *symbol;
5184 int index;
5187 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5188 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5189 static bool
5190 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5192 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5193 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5194 if (!cs->caller->thunk)
5195 adjust_references_in_caller (cs, pack->symbol, pack->index);
5196 return false;
5199 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5200 variable which is only dereferenced and which is represented by SYMBOL. See
5201 if we can remove ADDR reference in callers assosiated witht the call. */
5203 static void
5204 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5206 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5207 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5208 if (jfunc->type == IPA_JF_CONST)
5210 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5211 cs->lto_stmt_uid,
5212 IPA_REF_ADDR);
5213 if (!to_del)
5214 return;
5215 to_del->remove_reference ();
5216 ipa_zap_jf_refdesc (jfunc);
5217 if (dump_file)
5218 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5219 cs->caller->dump_name (), symbol->dump_name ());
5220 return;
5223 if (jfunc->type != IPA_JF_PASS_THROUGH
5224 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
5225 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
5226 return;
5228 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5229 cgraph_node *caller = cs->caller;
5230 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5231 /* TODO: This consistency check may be too big and not really
5232 that useful. Consider removing it. */
5233 tree cst;
5234 if (caller_info->ipcp_orig_node)
5235 cst = caller_info->known_csts[fidx];
5236 else
5238 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5239 gcc_assert (lat->is_single_const ());
5240 cst = lat->values->value;
5242 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5243 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5244 == symbol));
5246 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5247 if (cuses == IPA_UNDESCRIBED_USE)
5248 return;
5249 gcc_assert (cuses > 0);
5250 cuses--;
5251 ipa_set_controlled_uses (caller_info, fidx, cuses);
5252 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5253 if (dump_file && (dump_flags & TDF_DETAILS))
5254 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5255 "to %i.\n", fidx, caller->dump_name (), cuses);
5256 if (cuses)
5257 return;
5259 if (caller_info->ipcp_orig_node)
5261 /* Cloning machinery has created a reference here, we need to either
5262 remove it or change it to a read one. */
5263 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5264 if (to_del)
5266 to_del->remove_reference ();
5267 if (dump_file)
5268 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5269 cs->caller->dump_name (), symbol->dump_name ());
5270 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5272 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5273 if (dump_file)
5274 fprintf (dump_file,
5275 " ...and replaced it with LOAD one.\n");
5280 symbol_and_index_together pack;
5281 pack.symbol = symbol;
5282 pack.index = fidx;
5283 if (caller->can_change_signature)
5284 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5285 &pack, true);
5289 /* Return true if we would like to remove a parameter from NODE when cloning it
5290 with KNOWN_CSTS scalar constants. */
5292 static bool
5293 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5295 auto_vec<bool, 16> surviving;
5296 bool filled_vec = false;
5297 ipa_node_params *info = ipa_node_params_sum->get (node);
5298 int i, count = ipa_get_param_count (info);
5300 for (i = 0; i < count; i++)
5302 if (!known_csts[i] && ipa_is_param_used (info, i))
5303 continue;
5305 if (!filled_vec)
5307 clone_info *info = clone_info::get (node);
5308 if (!info || !info->param_adjustments)
5309 return true;
5310 info->param_adjustments->get_surviving_params (&surviving);
5311 filled_vec = true;
5313 if (surviving.length() < (unsigned) i && surviving[i])
5314 return true;
5316 return false;
5319 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5320 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5321 redirect all edges in CALLERS to it. */
5323 static struct cgraph_node *
5324 create_specialized_node (struct cgraph_node *node,
5325 vec<tree> known_csts,
5326 vec<ipa_polymorphic_call_context> known_contexts,
5327 vec<ipa_argagg_value, va_gc> *aggvals,
5328 vec<cgraph_edge *> &callers)
5330 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5331 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5332 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5333 struct cgraph_node *new_node;
5334 int i, count = ipa_get_param_count (info);
5335 clone_info *cinfo = clone_info::get (node);
5336 ipa_param_adjustments *old_adjustments = cinfo
5337 ? cinfo->param_adjustments : NULL;
5338 ipa_param_adjustments *new_adjustments;
5339 gcc_assert (!info->ipcp_orig_node);
5340 gcc_assert (node->can_change_signature
5341 || !old_adjustments);
5343 if (old_adjustments)
5345 /* At the moment all IPA optimizations should use the number of
5346 parameters of the prevailing decl as the m_always_copy_start.
5347 Handling any other value would complicate the code below, so for the
5348 time bing let's only assert it is so. */
5349 gcc_assert (old_adjustments->m_always_copy_start == count
5350 || old_adjustments->m_always_copy_start < 0);
5351 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5352 for (i = 0; i < old_adj_count; i++)
5354 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5355 if (!node->can_change_signature
5356 || old_adj->op != IPA_PARAM_OP_COPY
5357 || (!known_csts[old_adj->base_index]
5358 && ipa_is_param_used (info, old_adj->base_index)))
5360 ipa_adjusted_param new_adj = *old_adj;
5362 new_adj.prev_clone_adjustment = true;
5363 new_adj.prev_clone_index = i;
5364 vec_safe_push (new_params, new_adj);
5367 bool skip_return = old_adjustments->m_skip_return;
5368 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5369 ipa_param_adjustments (new_params, count,
5370 skip_return));
5372 else if (node->can_change_signature
5373 && want_remove_some_param_p (node, known_csts))
5375 ipa_adjusted_param adj;
5376 memset (&adj, 0, sizeof (adj));
5377 adj.op = IPA_PARAM_OP_COPY;
5378 for (i = 0; i < count; i++)
5379 if (!known_csts[i] && ipa_is_param_used (info, i))
5381 adj.base_index = i;
5382 adj.prev_clone_index = i;
5383 vec_safe_push (new_params, adj);
5385 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5386 ipa_param_adjustments (new_params, count, false));
5388 else
5389 new_adjustments = NULL;
5391 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5392 for (i = callers.length () - 1; i >= 0; i--)
5394 cgraph_edge *cs = callers[i];
5395 if (cs->caller == node)
5397 self_recursive_calls.safe_push (cs);
5398 callers.unordered_remove (i);
5401 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5402 for (i = 0; i < count; i++)
5404 tree t = known_csts[i];
5405 if (!t)
5406 continue;
5408 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5410 bool load_ref = false;
5411 symtab_node *ref_symbol;
5412 if (TREE_CODE (t) == ADDR_EXPR)
5414 tree base = get_base_address (TREE_OPERAND (t, 0));
5415 if (TREE_CODE (base) == VAR_DECL
5416 && ipa_get_controlled_uses (info, i) == 0
5417 && ipa_get_param_load_dereferenced (info, i)
5418 && (ref_symbol = symtab_node::get (base)))
5420 load_ref = true;
5421 if (node->can_change_signature)
5422 for (cgraph_edge *caller : callers)
5423 adjust_references_in_caller (caller, ref_symbol, i);
5427 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5428 if (replace_map)
5429 vec_safe_push (replace_trees, replace_map);
5432 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5433 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5434 node->decl)));
5435 new_node = node->create_virtual_clone (callers, replace_trees,
5436 new_adjustments, "constprop",
5437 suffix_counter);
5438 suffix_counter++;
5440 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5441 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5443 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5444 /* Cloned edges can disappear during cloning as speculation can be
5445 resolved, check that we have one and that it comes from the last
5446 cloning. */
5447 if (cs && cs->caller == new_node)
5448 cs->redirect_callee_duplicating_thunks (new_node);
5449 /* Any future code that would make more than one clone of an outgoing
5450 edge would confuse this mechanism, so let's check that does not
5451 happen. */
5452 gcc_checking_assert (!cs
5453 || !get_next_cgraph_edge_clone (cs)
5454 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5456 if (have_self_recursive_calls)
5457 new_node->expand_all_artificial_thunks ();
5459 ipa_set_node_agg_value_chain (new_node, aggvals);
5460 for (const ipa_argagg_value &av : aggvals)
5461 new_node->maybe_create_reference (av.value, NULL);
5463 if (dump_file && (dump_flags & TDF_DETAILS))
5465 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5466 if (known_contexts.exists ())
5468 for (i = 0; i < count; i++)
5469 if (!known_contexts[i].useless_p ())
5471 fprintf (dump_file, " known ctx %i is ", i);
5472 known_contexts[i].dump (dump_file);
5475 if (aggvals)
5477 fprintf (dump_file, " Aggregate replacements:");
5478 ipa_argagg_value_list avs (aggvals);
5479 avs.dump (dump_file);
5483 new_info = ipa_node_params_sum->get (new_node);
5484 new_info->ipcp_orig_node = node;
5485 new_node->ipcp_clone = true;
5486 new_info->known_csts = known_csts;
5487 new_info->known_contexts = known_contexts;
5489 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5490 aggvals);
5492 return new_node;
5495 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5496 pass-through function to itself when the cgraph_node involved is not an
5497 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5498 no-operation pass-through. */
5500 static bool
5501 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5502 bool simple = true)
5504 enum availability availability;
5505 if (cs->caller == cs->callee->function_symbol (&availability)
5506 && availability > AVAIL_INTERPOSABLE
5507 && jfunc->type == IPA_JF_PASS_THROUGH
5508 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5509 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5510 && ipa_node_params_sum->get (cs->caller)
5511 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5512 return true;
5513 return false;
5516 /* Return true if JFUNC, which describes a part of an aggregate represented or
5517 pointed to by the i-th parameter of call CS, is a pass-through function to
5518 itself when the cgraph_node involved is not an IPA-CP clone.. When
5519 SIMPLE is true, further check if JFUNC is a simple no-operation
5520 pass-through. */
5522 static bool
5523 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5524 const ipa_agg_jf_item *jfunc,
5525 int i, bool simple = true)
5527 enum availability availability;
5528 if (cs->caller == cs->callee->function_symbol (&availability)
5529 && availability > AVAIL_INTERPOSABLE
5530 && jfunc->jftype == IPA_JF_LOAD_AGG
5531 && jfunc->offset == jfunc->value.load_agg.offset
5532 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5533 && jfunc->value.pass_through.formal_id == i
5534 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5535 && ipa_node_params_sum->get (cs->caller)
5536 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5537 return true;
5538 return false;
5541 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5542 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5544 static void
5545 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5546 vec<tree> &known_csts,
5547 const vec<cgraph_edge *> &callers)
5549 ipa_node_params *info = ipa_node_params_sum->get (node);
5550 int i, count = ipa_get_param_count (info);
5552 for (i = 0; i < count; i++)
5554 struct cgraph_edge *cs;
5555 tree newval = NULL_TREE;
5556 int j;
5557 bool first = true;
5558 tree type = ipa_get_type (info, i);
5560 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5561 continue;
5563 FOR_EACH_VEC_ELT (callers, j, cs)
5565 struct ipa_jump_func *jump_func;
5566 tree t;
5568 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5569 if (!args
5570 || i >= ipa_get_cs_argument_count (args)
5571 || (i == 0
5572 && call_passes_through_thunk (cs)))
5574 newval = NULL_TREE;
5575 break;
5577 jump_func = ipa_get_ith_jump_func (args, i);
5579 /* Besides simple pass-through jump function, arithmetic jump
5580 function could also introduce argument-direct-pass-through for
5581 self-feeding recursive call. For example,
5583 fn (int i)
5585 fn (i & 1);
5588 Given that i is 0, recursive propagation via (i & 1) also gets
5589 0. */
5590 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5592 gcc_assert (newval);
5593 t = ipa_get_jf_arith_result (
5594 ipa_get_jf_pass_through_operation (jump_func),
5595 newval,
5596 ipa_get_jf_pass_through_operand (jump_func),
5597 type);
5599 else
5600 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5601 jump_func, type);
5602 if (!t
5603 || (newval
5604 && !values_equal_for_ipcp_p (t, newval))
5605 || (!first && !newval))
5607 newval = NULL_TREE;
5608 break;
5610 else
5611 newval = t;
5612 first = false;
5615 if (newval)
5617 if (dump_file && (dump_flags & TDF_DETAILS))
5619 fprintf (dump_file, " adding an extra known scalar value ");
5620 print_ipcp_constant_value (dump_file, newval);
5621 fprintf (dump_file, " for ");
5622 ipa_dump_param (dump_file, info, i);
5623 fprintf (dump_file, "\n");
5626 known_csts[i] = newval;
5631 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5632 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5633 CALLERS. */
5635 static void
5636 find_more_contexts_for_caller_subset (cgraph_node *node,
5637 vec<ipa_polymorphic_call_context>
5638 *known_contexts,
5639 const vec<cgraph_edge *> &callers)
5641 ipa_node_params *info = ipa_node_params_sum->get (node);
5642 int i, count = ipa_get_param_count (info);
5644 for (i = 0; i < count; i++)
5646 cgraph_edge *cs;
5648 if (ipa_get_poly_ctx_lat (info, i)->bottom
5649 || (known_contexts->exists ()
5650 && !(*known_contexts)[i].useless_p ()))
5651 continue;
5653 ipa_polymorphic_call_context newval;
5654 bool first = true;
5655 int j;
5657 FOR_EACH_VEC_ELT (callers, j, cs)
5659 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5660 if (!args
5661 || i >= ipa_get_cs_argument_count (args))
5662 return;
5663 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5664 ipa_polymorphic_call_context ctx;
5665 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5666 cs, i, jfunc);
5667 if (first)
5669 newval = ctx;
5670 first = false;
5672 else
5673 newval.meet_with (ctx);
5674 if (newval.useless_p ())
5675 break;
5678 if (!newval.useless_p ())
5680 if (dump_file && (dump_flags & TDF_DETAILS))
5682 fprintf (dump_file, " adding an extra known polymorphic "
5683 "context ");
5684 print_ipcp_constant_value (dump_file, newval);
5685 fprintf (dump_file, " for ");
5686 ipa_dump_param (dump_file, info, i);
5687 fprintf (dump_file, "\n");
5690 if (!known_contexts->exists ())
5691 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5692 true);
5693 (*known_contexts)[i] = newval;
5699 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5700 RES. If INTERIM is non-NULL, it contains the current interim state of
5701 collected aggregate values which can be used to compute values passed over
5702 self-recursive edges.
5704 This basically one iteration of push_agg_values_from_edge over one
5705 parameter, which allows for simpler early returns. */
5707 static void
5708 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5709 vec<ipa_argagg_value> *res,
5710 const ipa_argagg_value_list *interim)
5712 bool agg_values_from_caller = false;
5713 bool agg_jf_preserved = false;
5714 unsigned unit_delta = UINT_MAX;
5715 int src_idx = -1;
5716 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5717 index);
5719 if (jfunc->type == IPA_JF_PASS_THROUGH
5720 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5722 agg_values_from_caller = true;
5723 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5724 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5725 unit_delta = 0;
5727 else if (jfunc->type == IPA_JF_ANCESTOR
5728 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5730 agg_values_from_caller = true;
5731 agg_jf_preserved = true;
5732 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5733 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5736 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5737 if (agg_values_from_caller)
5739 if (caller_info->ipcp_orig_node)
5741 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5742 ipcp_transformation *ts
5743 = ipcp_get_transformation_summary (cs->caller);
5744 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5745 ipcp_param_lattices *orig_plats
5746 = ipa_get_parm_lattices (orig_info, src_idx);
5747 if (ts
5748 && orig_plats->aggs
5749 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5751 ipa_argagg_value_list src (ts);
5752 src.push_adjusted_values (src_idx, index, unit_delta, res);
5753 return;
5756 else
5758 ipcp_param_lattices *src_plats
5759 = ipa_get_parm_lattices (caller_info, src_idx);
5760 if (src_plats->aggs
5761 && !src_plats->aggs_bottom
5762 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5764 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5766 interim->push_adjusted_values (src_idx, index, unit_delta,
5767 res);
5768 return;
5770 if (!src_plats->aggs_contain_variable)
5772 push_agg_values_from_plats (src_plats, index, unit_delta,
5773 res);
5774 return;
5780 if (!jfunc->agg.items)
5781 return;
5782 bool first = true;
5783 unsigned prev_unit_offset = 0;
5784 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5786 tree value, srcvalue;
5787 /* Besides simple pass-through aggregate jump function, arithmetic
5788 aggregate jump function could also bring same aggregate value as
5789 parameter passed-in for self-feeding recursive call. For example,
5791 fn (int *i)
5793 int j = *i & 1;
5794 fn (&j);
5797 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5798 if (interim
5799 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5800 && (srcvalue = interim->get_value(index,
5801 agg_jf.offset / BITS_PER_UNIT)))
5802 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5803 srcvalue,
5804 agg_jf.value.pass_through.operand,
5805 agg_jf.type);
5806 else
5807 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5808 &agg_jf);
5809 if (value)
5811 struct ipa_argagg_value iav;
5812 iav.value = value;
5813 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5814 iav.index = index;
5815 iav.by_ref = jfunc->agg.by_ref;
5817 gcc_assert (first
5818 || iav.unit_offset > prev_unit_offset);
5819 prev_unit_offset = iav.unit_offset;
5820 first = false;
5822 res->safe_push (iav);
5825 return;
5828 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5829 description of ultimate callee of CS or the one it was cloned from (the
5830 summary where lattices are). If INTERIM is non-NULL, it contains the
5831 current interim state of collected aggregate values which can be used to
5832 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5833 is true) and to skip values which clearly will not be part of intersection
5834 with INTERIM. */
5836 static void
5837 push_agg_values_from_edge (struct cgraph_edge *cs,
5838 ipa_node_params *dest_info,
5839 vec<ipa_argagg_value> *res,
5840 const ipa_argagg_value_list *interim,
5841 bool optimize_self_recursion)
5843 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5844 if (!args)
5845 return;
5847 int count = MIN (ipa_get_param_count (dest_info),
5848 ipa_get_cs_argument_count (args));
5850 unsigned interim_index = 0;
5851 for (int index = 0; index < count; index++)
5853 if (interim)
5855 while (interim_index < interim->m_elts.size ()
5856 && interim->m_elts[interim_index].value
5857 && interim->m_elts[interim_index].index < index)
5858 interim_index++;
5859 if (interim_index >= interim->m_elts.size ()
5860 || interim->m_elts[interim_index].index > index)
5861 continue;
5864 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5865 if (!ipa_is_param_used (dest_info, index)
5866 || plats->aggs_bottom)
5867 continue;
5868 push_agg_values_for_index_from_edge (cs, index, res,
5869 optimize_self_recursion ? interim
5870 : NULL);
5875 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5876 from all of them. Return nullptr if there are none. */
5878 static struct vec<ipa_argagg_value, va_gc> *
5879 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5880 const vec<cgraph_edge *> &callers)
5882 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5883 if (dest_info->ipcp_orig_node)
5884 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5886 /* gather_edges_for_value puts a non-recursive call into the first element of
5887 callers if it can. */
5888 auto_vec<ipa_argagg_value, 32> interim;
5889 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5891 unsigned valid_entries = interim.length ();
5892 if (!valid_entries)
5893 return nullptr;
5895 unsigned caller_count = callers.length();
5896 for (unsigned i = 1; i < caller_count; i++)
5898 auto_vec<ipa_argagg_value, 32> last;
5899 ipa_argagg_value_list avs (&interim);
5900 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5902 valid_entries = intersect_argaggs_with (interim, last);
5903 if (!valid_entries)
5904 return nullptr;
5907 vec<ipa_argagg_value, va_gc> *res = NULL;
5908 vec_safe_reserve_exact (res, valid_entries);
5909 for (const ipa_argagg_value &av : interim)
5910 if (av.value)
5911 res->quick_push(av);
5912 gcc_checking_assert (res->length () == valid_entries);
5913 return res;
5916 /* Determine whether CS also brings all scalar values that the NODE is
5917 specialized for. */
5919 static bool
5920 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5921 struct cgraph_node *node)
5923 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5924 int count = ipa_get_param_count (dest_info);
5925 class ipa_node_params *caller_info;
5926 class ipa_edge_args *args;
5927 int i;
5929 caller_info = ipa_node_params_sum->get (cs->caller);
5930 args = ipa_edge_args_sum->get (cs);
5931 for (i = 0; i < count; i++)
5933 struct ipa_jump_func *jump_func;
5934 tree val, t;
5936 val = dest_info->known_csts[i];
5937 if (!val)
5938 continue;
5940 if (i >= ipa_get_cs_argument_count (args))
5941 return false;
5942 jump_func = ipa_get_ith_jump_func (args, i);
5943 t = ipa_value_from_jfunc (caller_info, jump_func,
5944 ipa_get_type (dest_info, i));
5945 if (!t || !values_equal_for_ipcp_p (val, t))
5946 return false;
5948 return true;
5951 /* Determine whether CS also brings all aggregate values that NODE is
5952 specialized for. */
5954 static bool
5955 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5956 struct cgraph_node *node)
5958 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5959 if (!ts || vec_safe_is_empty (ts->m_agg_values))
5960 return true;
5962 const ipa_argagg_value_list existing (ts->m_agg_values);
5963 auto_vec<ipa_argagg_value, 32> edge_values;
5964 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5965 gcc_checking_assert (dest_info->ipcp_orig_node);
5966 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5967 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5968 const ipa_argagg_value_list avl (&edge_values);
5969 return avl.superset_of_p (existing);
5972 /* Given an original NODE and a VAL for which we have already created a
5973 specialized clone, look whether there are incoming edges that still lead
5974 into the old node but now also bring the requested value and also conform to
5975 all other criteria such that they can be redirected the special node.
5976 This function can therefore redirect the final edge in a SCC. */
5978 template <typename valtype>
5979 static void
5980 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5982 ipcp_value_source<valtype> *src;
5983 profile_count redirected_sum = profile_count::zero ();
5985 for (src = val->sources; src; src = src->next)
5987 struct cgraph_edge *cs = src->cs;
5988 while (cs)
5990 if (cgraph_edge_brings_value_p (cs, src, node, val)
5991 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5992 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5994 if (dump_file)
5995 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5996 cs->caller->dump_name (),
5997 val->spec_node->dump_name ());
5999 cs->redirect_callee_duplicating_thunks (val->spec_node);
6000 val->spec_node->expand_all_artificial_thunks ();
6001 if (cs->count.ipa ().initialized_p ())
6002 redirected_sum = redirected_sum + cs->count.ipa ();
6004 cs = get_next_cgraph_edge_clone (cs);
6008 if (redirected_sum.nonzero_p ())
6009 update_specialized_profile (val->spec_node, node, redirected_sum);
6012 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6014 static bool
6015 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
6017 ipa_polymorphic_call_context *ctx;
6018 int i;
6020 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
6021 if (!ctx->useless_p ())
6022 return true;
6023 return false;
6026 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6028 static vec<ipa_polymorphic_call_context>
6029 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
6031 if (known_contexts_useful_p (known_contexts))
6032 return known_contexts.copy ();
6033 else
6034 return vNULL;
6037 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6038 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6039 copy too. */
6041 static void
6042 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6043 vec<tree> *known_csts,
6044 vec<ipa_polymorphic_call_context> *known_contexts,
6045 ipcp_value<tree> *val, int index)
6047 *known_csts = avals->m_known_vals.copy ();
6048 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6049 (*known_csts)[index] = val->value;
6052 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6053 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6054 INDEX. */
6056 static void
6057 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6058 vec<tree> *known_csts,
6059 vec<ipa_polymorphic_call_context> *known_contexts,
6060 ipcp_value<ipa_polymorphic_call_context> *val,
6061 int index)
6063 *known_csts = avals->m_known_vals.copy ();
6064 *known_contexts = avals->m_known_contexts.copy ();
6065 (*known_contexts)[index] = val->value;
6068 /* Return true if OFFSET indicates this was not an aggregate value or there is
6069 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6070 AGGVALS list. */
6072 DEBUG_FUNCTION bool
6073 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6074 int index, HOST_WIDE_INT offset, tree value)
6076 if (offset == -1)
6077 return true;
6079 const ipa_argagg_value_list avl (aggvals);
6080 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6081 return v && values_equal_for_ipcp_p (v, value);
6084 /* Return true if offset is minus one because source of a polymorphic context
6085 cannot be an aggregate value. */
6087 DEBUG_FUNCTION bool
6088 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6089 int , HOST_WIDE_INT offset,
6090 ipa_polymorphic_call_context)
6092 return offset == -1;
6095 /* Decide whether to create a special version of NODE for value VAL of
6096 parameter at the given INDEX. If OFFSET is -1, the value is for the
6097 parameter itself, otherwise it is stored at the given OFFSET of the
6098 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6099 is a vector which contains clones created for self-recursive calls with an
6100 arithmetic pass-through jump function. */
6102 template <typename valtype>
6103 static bool
6104 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6105 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6106 vec<cgraph_node *> *self_gen_clones)
6108 int caller_count;
6109 sreal freq_sum;
6110 profile_count count_sum, rec_count_sum;
6111 vec<cgraph_edge *> callers;
6113 if (val->spec_node)
6115 perhaps_add_new_callers (node, val);
6116 return false;
6118 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6120 if (dump_file && (dump_flags & TDF_DETAILS))
6121 fprintf (dump_file, " Ignoring candidate value because "
6122 "maximum unit size would be reached with %li.\n",
6123 val->local_size_cost + overall_size);
6124 return false;
6126 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6127 &rec_count_sum, &count_sum))
6128 return false;
6130 if (!dbg_cnt (ipa_cp_values))
6131 return false;
6133 if (val->self_recursion_generated_p ())
6135 /* The edge counts in this case might not have been adjusted yet.
6136 Nevertleless, even if they were it would be only a guesswork which we
6137 can do now. The recursive part of the counts can be derived from the
6138 count of the original node anyway. */
6139 if (node->count.ipa ().nonzero_p ())
6141 unsigned dem = self_gen_clones->length () + 1;
6142 rec_count_sum = node->count.ipa () / dem;
6144 else
6145 rec_count_sum = profile_count::zero ();
6148 /* get_info_about_necessary_edges only sums up ipa counts. */
6149 count_sum += rec_count_sum;
6151 if (dump_file && (dump_flags & TDF_DETAILS))
6153 fprintf (dump_file, " - considering value ");
6154 print_ipcp_constant_value (dump_file, val->value);
6155 fprintf (dump_file, " for ");
6156 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6157 if (offset != -1)
6158 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6159 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6162 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6163 freq_sum, count_sum,
6164 val->local_size_cost)
6165 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6166 freq_sum, count_sum, val->prop_size_cost))
6167 return false;
6169 if (dump_file)
6170 fprintf (dump_file, " Creating a specialized node of %s.\n",
6171 node->dump_name ());
6173 vec<tree> known_csts;
6174 vec<ipa_polymorphic_call_context> known_contexts;
6176 callers = gather_edges_for_value (val, node, caller_count);
6177 if (offset == -1)
6178 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6179 else
6181 known_csts = avals->m_known_vals.copy ();
6182 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6184 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6185 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6186 vec<ipa_argagg_value, va_gc> *aggvals
6187 = find_aggregate_values_for_callers_subset (node, callers);
6188 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6189 offset, val->value));
6190 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6191 aggvals, callers);
6193 if (val->self_recursion_generated_p ())
6194 self_gen_clones->safe_push (val->spec_node);
6195 else
6196 update_profiling_info (node, val->spec_node);
6198 callers.release ();
6199 overall_size += val->local_size_cost;
6200 if (dump_file && (dump_flags & TDF_DETAILS))
6201 fprintf (dump_file, " overall size reached %li\n",
6202 overall_size);
6204 /* TODO: If for some lattice there is only one other known value
6205 left, make a special node for it too. */
6207 return true;
6210 /* Like irange::contains_p(), but convert VAL to the range of R if
6211 necessary. */
6213 static inline bool
6214 ipa_range_contains_p (const vrange &r, tree val)
6216 if (r.undefined_p ())
6217 return false;
6219 tree type = r.type ();
6220 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6221 return false;
6223 val = fold_convert (type, val);
6224 return r.contains_p (val);
6227 /* Decide whether and what specialized clones of NODE should be created. */
6229 static bool
6230 decide_whether_version_node (struct cgraph_node *node)
6232 ipa_node_params *info = ipa_node_params_sum->get (node);
6233 int i, count = ipa_get_param_count (info);
6234 bool ret = false;
6236 if (count == 0)
6237 return false;
6239 if (dump_file && (dump_flags & TDF_DETAILS))
6240 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6241 node->dump_name ());
6243 auto_vec <cgraph_node *, 9> self_gen_clones;
6244 ipa_auto_call_arg_values avals;
6245 gather_context_independent_values (info, &avals, false, NULL);
6247 for (i = 0; i < count;i++)
6249 if (!ipa_is_param_used (info, i))
6250 continue;
6252 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6253 ipcp_lattice<tree> *lat = &plats->itself;
6254 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6256 if (!lat->bottom
6257 && !avals.m_known_vals[i])
6259 ipcp_value<tree> *val;
6260 for (val = lat->values; val; val = val->next)
6262 /* If some values generated for self-recursive calls with
6263 arithmetic jump functions fall outside of the known
6264 value_range for the parameter, we can skip them. VR interface
6265 supports this only for integers now. */
6266 if (TREE_CODE (val->value) == INTEGER_CST
6267 && !plats->m_value_range.bottom_p ()
6268 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6269 val->value))
6271 /* This can happen also if a constant present in the source
6272 code falls outside of the range of parameter's type, so we
6273 cannot assert. */
6274 if (dump_file && (dump_flags & TDF_DETAILS))
6276 fprintf (dump_file, " - skipping%s value ",
6277 val->self_recursion_generated_p ()
6278 ? " self_recursion_generated" : "");
6279 print_ipcp_constant_value (dump_file, val->value);
6280 fprintf (dump_file, " because it is outside known "
6281 "value range.\n");
6283 continue;
6285 ret |= decide_about_value (node, i, -1, val, &avals,
6286 &self_gen_clones);
6290 if (!plats->aggs_bottom)
6292 struct ipcp_agg_lattice *aglat;
6293 ipcp_value<tree> *val;
6294 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6295 if (!aglat->bottom && aglat->values
6296 /* If the following is false, the one value has been considered
6297 for cloning for all contexts. */
6298 && (plats->aggs_contain_variable
6299 || !aglat->is_single_const ()))
6300 for (val = aglat->values; val; val = val->next)
6301 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6302 &self_gen_clones);
6305 if (!ctxlat->bottom
6306 && avals.m_known_contexts[i].useless_p ())
6308 ipcp_value<ipa_polymorphic_call_context> *val;
6309 for (val = ctxlat->values; val; val = val->next)
6310 ret |= decide_about_value (node, i, -1, val, &avals,
6311 &self_gen_clones);
6315 if (!self_gen_clones.is_empty ())
6317 self_gen_clones.safe_push (node);
6318 update_counts_for_self_gen_clones (node, self_gen_clones);
6321 if (info->do_clone_for_all_contexts)
6323 if (!dbg_cnt (ipa_cp_values))
6325 info->do_clone_for_all_contexts = false;
6326 return ret;
6329 struct cgraph_node *clone;
6330 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6332 for (int i = callers.length () - 1; i >= 0; i--)
6334 cgraph_edge *cs = callers[i];
6335 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6337 if (caller_info && caller_info->node_dead)
6338 callers.unordered_remove (i);
6341 if (!adjust_callers_for_value_intersection (callers, node))
6343 /* If node is not called by anyone, or all its caller edges are
6344 self-recursive, the node is not really in use, no need to do
6345 cloning. */
6346 info->do_clone_for_all_contexts = false;
6347 return ret;
6350 if (dump_file)
6351 fprintf (dump_file, " - Creating a specialized node of %s "
6352 "for all known contexts.\n", node->dump_name ());
6354 vec<tree> known_csts = avals.m_known_vals.copy ();
6355 vec<ipa_polymorphic_call_context> known_contexts
6356 = copy_useful_known_contexts (avals.m_known_contexts);
6357 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6358 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6359 vec<ipa_argagg_value, va_gc> *aggvals
6360 = find_aggregate_values_for_callers_subset (node, callers);
6362 if (!known_contexts_useful_p (known_contexts))
6364 known_contexts.release ();
6365 known_contexts = vNULL;
6367 clone = create_specialized_node (node, known_csts, known_contexts,
6368 aggvals, callers);
6369 info->do_clone_for_all_contexts = false;
6370 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6371 ret = true;
6374 return ret;
6377 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6379 static void
6380 spread_undeadness (struct cgraph_node *node)
6382 struct cgraph_edge *cs;
6384 for (cs = node->callees; cs; cs = cs->next_callee)
6385 if (ipa_edge_within_scc (cs))
6387 struct cgraph_node *callee;
6388 class ipa_node_params *info;
6390 callee = cs->callee->function_symbol (NULL);
6391 info = ipa_node_params_sum->get (callee);
6393 if (info && info->node_dead)
6395 info->node_dead = 0;
6396 spread_undeadness (callee);
6401 /* Return true if NODE has a caller from outside of its SCC that is not
6402 dead. Worker callback for cgraph_for_node_and_aliases. */
6404 static bool
6405 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6406 void *data ATTRIBUTE_UNUSED)
6408 struct cgraph_edge *cs;
6410 for (cs = node->callers; cs; cs = cs->next_caller)
6411 if (cs->caller->thunk
6412 && cs->caller->call_for_symbol_thunks_and_aliases
6413 (has_undead_caller_from_outside_scc_p, NULL, true))
6414 return true;
6415 else if (!ipa_edge_within_scc (cs))
6417 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6418 if (!caller_info /* Unoptimized caller are like dead ones. */
6419 || !caller_info->node_dead)
6420 return true;
6422 return false;
6426 /* Identify nodes within the same SCC as NODE which are no longer needed
6427 because of new clones and will be removed as unreachable. */
6429 static void
6430 identify_dead_nodes (struct cgraph_node *node)
6432 struct cgraph_node *v;
6433 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6434 if (v->local)
6436 ipa_node_params *info = ipa_node_params_sum->get (v);
6437 if (info
6438 && !v->call_for_symbol_thunks_and_aliases
6439 (has_undead_caller_from_outside_scc_p, NULL, true))
6440 info->node_dead = 1;
6443 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6445 ipa_node_params *info = ipa_node_params_sum->get (v);
6446 if (info && !info->node_dead)
6447 spread_undeadness (v);
6450 if (dump_file && (dump_flags & TDF_DETAILS))
6452 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6453 if (ipa_node_params_sum->get (v)
6454 && ipa_node_params_sum->get (v)->node_dead)
6455 fprintf (dump_file, " Marking node as dead: %s.\n",
6456 v->dump_name ());
6460 /* The decision stage. Iterate over the topological order of call graph nodes
6461 TOPO and make specialized clones if deemed beneficial. */
6463 static void
6464 ipcp_decision_stage (class ipa_topo_info *topo)
6466 int i;
6468 if (dump_file)
6469 fprintf (dump_file, "\nIPA decision stage:\n\n");
6471 for (i = topo->nnodes - 1; i >= 0; i--)
6473 struct cgraph_node *node = topo->order[i];
6474 bool change = false, iterate = true;
6476 while (iterate)
6478 struct cgraph_node *v;
6479 iterate = false;
6480 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6481 if (v->has_gimple_body_p ()
6482 && ipcp_versionable_function_p (v))
6483 iterate |= decide_whether_version_node (v);
6485 change |= iterate;
6487 if (change)
6488 identify_dead_nodes (node);
6492 /* Look up all the bits information that we have discovered and copy it over
6493 to the transformation summary. */
6495 static void
6496 ipcp_store_bits_results (void)
6498 cgraph_node *node;
6500 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6502 ipa_node_params *info = ipa_node_params_sum->get (node);
6503 bool dumped_sth = false;
6504 bool found_useful_result = false;
6506 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6508 if (dump_file)
6509 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6510 "; -fipa-bit-cp: disabled.\n",
6511 node->dump_name ());
6512 continue;
6515 if (info->ipcp_orig_node)
6516 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6517 if (!info->lattices)
6518 /* Newly expanded artificial thunks do not have lattices. */
6519 continue;
6521 unsigned count = ipa_get_param_count (info);
6522 for (unsigned i = 0; i < count; i++)
6524 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6525 if (plats->bits_lattice.constant_p ())
6527 found_useful_result = true;
6528 break;
6532 if (!found_useful_result)
6533 continue;
6535 ipcp_transformation_initialize ();
6536 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6537 vec_safe_reserve_exact (ts->bits, count);
6539 for (unsigned i = 0; i < count; i++)
6541 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6542 ipa_bits *jfbits;
6544 if (plats->bits_lattice.constant_p ())
6546 jfbits
6547 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6548 plats->bits_lattice.get_mask ());
6549 if (!dbg_cnt (ipa_cp_bits))
6550 jfbits = NULL;
6552 else
6553 jfbits = NULL;
6555 ts->bits->quick_push (jfbits);
6556 if (!dump_file || !jfbits)
6557 continue;
6558 if (!dumped_sth)
6560 fprintf (dump_file, "Propagated bits info for function %s:\n",
6561 node->dump_name ());
6562 dumped_sth = true;
6564 fprintf (dump_file, " param %i: value = ", i);
6565 print_hex (jfbits->value, dump_file);
6566 fprintf (dump_file, ", mask = ");
6567 print_hex (jfbits->mask, dump_file);
6568 fprintf (dump_file, "\n");
6573 /* Look up all VR information that we have discovered and copy it over
6574 to the transformation summary. */
6576 static void
6577 ipcp_store_vr_results (void)
6579 cgraph_node *node;
6581 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6583 ipa_node_params *info = ipa_node_params_sum->get (node);
6584 bool found_useful_result = false;
6586 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6588 if (dump_file)
6589 fprintf (dump_file, "Not considering %s for VR discovery "
6590 "and propagate; -fipa-ipa-vrp: disabled.\n",
6591 node->dump_name ());
6592 continue;
6595 if (info->ipcp_orig_node)
6596 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6597 if (!info->lattices)
6598 /* Newly expanded artificial thunks do not have lattices. */
6599 continue;
6601 unsigned count = ipa_get_param_count (info);
6602 for (unsigned i = 0; i < count; i++)
6604 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6605 if (!plats->m_value_range.bottom_p ()
6606 && !plats->m_value_range.top_p ())
6608 found_useful_result = true;
6609 break;
6612 if (!found_useful_result)
6613 continue;
6615 ipcp_transformation_initialize ();
6616 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6617 vec_safe_reserve_exact (ts->m_vr, count);
6619 for (unsigned i = 0; i < count; i++)
6621 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6623 if (!plats->m_value_range.bottom_p ()
6624 && !plats->m_value_range.top_p ()
6625 && dbg_cnt (ipa_cp_vr))
6627 ipa_vr vr (plats->m_value_range.m_vr);
6628 ts->m_vr->quick_push (vr);
6630 else
6632 ipa_vr vr;
6633 ts->m_vr->quick_push (vr);
6639 /* The IPCP driver. */
6641 static unsigned int
6642 ipcp_driver (void)
6644 class ipa_topo_info topo;
6646 if (edge_clone_summaries == NULL)
6647 edge_clone_summaries = new edge_clone_summary_t (symtab);
6649 ipa_check_create_node_params ();
6650 ipa_check_create_edge_args ();
6651 clone_num_suffixes = new hash_map<const char *, unsigned>;
6653 if (dump_file)
6655 fprintf (dump_file, "\nIPA structures before propagation:\n");
6656 if (dump_flags & TDF_DETAILS)
6657 ipa_print_all_params (dump_file);
6658 ipa_print_all_jump_functions (dump_file);
6661 /* Topological sort. */
6662 build_toporder_info (&topo);
6663 /* Do the interprocedural propagation. */
6664 ipcp_propagate_stage (&topo);
6665 /* Decide what constant propagation and cloning should be performed. */
6666 ipcp_decision_stage (&topo);
6667 /* Store results of bits propagation. */
6668 ipcp_store_bits_results ();
6669 /* Store results of value range propagation. */
6670 ipcp_store_vr_results ();
6672 /* Free all IPCP structures. */
6673 delete clone_num_suffixes;
6674 free_toporder_info (&topo);
6675 delete edge_clone_summaries;
6676 edge_clone_summaries = NULL;
6677 ipa_free_all_structures_after_ipa_cp ();
6678 if (dump_file)
6679 fprintf (dump_file, "\nIPA constant propagation end\n");
6680 return 0;
6683 /* Initialization and computation of IPCP data structures. This is the initial
6684 intraprocedural analysis of functions, which gathers information to be
6685 propagated later on. */
6687 static void
6688 ipcp_generate_summary (void)
6690 struct cgraph_node *node;
6692 if (dump_file)
6693 fprintf (dump_file, "\nIPA constant propagation start:\n");
6694 ipa_register_cgraph_hooks ();
6696 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6697 ipa_analyze_node (node);
6700 namespace {
6702 const pass_data pass_data_ipa_cp =
6704 IPA_PASS, /* type */
6705 "cp", /* name */
6706 OPTGROUP_NONE, /* optinfo_flags */
6707 TV_IPA_CONSTANT_PROP, /* tv_id */
6708 0, /* properties_required */
6709 0, /* properties_provided */
6710 0, /* properties_destroyed */
6711 0, /* todo_flags_start */
6712 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6715 class pass_ipa_cp : public ipa_opt_pass_d
6717 public:
6718 pass_ipa_cp (gcc::context *ctxt)
6719 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6720 ipcp_generate_summary, /* generate_summary */
6721 NULL, /* write_summary */
6722 NULL, /* read_summary */
6723 ipcp_write_transformation_summaries, /*
6724 write_optimization_summary */
6725 ipcp_read_transformation_summaries, /*
6726 read_optimization_summary */
6727 NULL, /* stmt_fixup */
6728 0, /* function_transform_todo_flags_start */
6729 ipcp_transform_function, /* function_transform */
6730 NULL) /* variable_transform */
6733 /* opt_pass methods: */
6734 bool gate (function *) final override
6736 /* FIXME: We should remove the optimize check after we ensure we never run
6737 IPA passes when not optimizing. */
6738 return (flag_ipa_cp && optimize) || in_lto_p;
6741 unsigned int execute (function *) final override { return ipcp_driver (); }
6743 }; // class pass_ipa_cp
6745 } // anon namespace
6747 ipa_opt_pass_d *
6748 make_pass_ipa_cp (gcc::context *ctxt)
6750 return new pass_ipa_cp (ctxt);
6753 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6754 within the same process. For use by toplev::finalize. */
6756 void
6757 ipa_cp_cc_finalize (void)
6759 base_count = profile_count::uninitialized ();
6760 overall_size = 0;
6761 orig_overall_size = 0;
6762 ipcp_free_transformation_sum ();