hppa: Export main in pr104869.C on hpux
[official-gcc.git] / gcc / ipa-cp.cc
blob34fae065454beee13767d643788d8ffc01815b18
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype> class ipcp_value;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype>
138 struct ipcp_value_source
140 public:
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset;
144 /* The incoming edge that brought the value. */
145 cgraph_edge *cs;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value<valtype> *val;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source *next;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
155 int index;
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
162 public:
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit;
166 /* Time benefit that specializing the function for this value can bring about
167 in it's callees. */
168 sreal prop_time_benefit;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
171 int local_size_cost;
172 /* Size cost that specializing the function for this value can bring about in
173 it's callees. */
174 int prop_size_cost;
176 ipcp_value_base ()
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype>
184 class ipcp_value : public ipcp_value_base
186 public:
187 /* The actual value for the given parameter. */
188 valtype value;
189 /* The list of sources from which this value originates. */
190 ipcp_value_source <valtype> *sources = nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value *next = nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
194 of values. */
195 ipcp_value *scc_next = nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value *topo_next = nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
200 created. */
201 cgraph_node *spec_node = nullptr;
202 /* Depth first search number and low link for topological sorting of
203 values. */
204 int dfs = 0;
205 int low_link = 0;
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
208 int scc_no = 0;
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
213 maximum. */
214 unsigned self_recursion_generated_level = 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack = false;
218 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
219 HOST_WIDE_INT offset);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value<valtype> *o)
225 return o->scc_no == scc_no;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level > 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype>
245 struct ipcp_lattice
247 public:
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value<valtype> *values;
252 /* Number of known values and types in this lattice. */
253 int values_count;
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 propagation). */
258 bool bottom;
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval, cgraph_edge *cs,
264 ipcp_value<valtype> *src_val = NULL,
265 int src_idx = 0, HOST_WIDE_INT offset = -1,
266 ipcp_value<valtype> **val_p = NULL,
267 unsigned same_lat_gen_level = 0);
268 void print (FILE * f, bool dump_sources, bool dump_benefits);
271 /* Lattice of tree values with an offset to describe a part of an
272 aggregate. */
274 struct ipcp_agg_lattice : public ipcp_lattice<tree>
276 public:
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
281 HOST_WIDE_INT size;
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice *next;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
288 value are constant.
289 For eg:
290 int f(int x)
292 return some_op (x);
295 int f1(int y)
297 if (cond)
298 return f (y & 0xff);
299 else
300 return f (y & 0xf);
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
311 public:
312 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
313 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
314 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int, widest_int);
317 bool known_nonzero_p () const;
319 widest_int get_value () const { return m_value; }
320 widest_int get_mask () const { return m_mask; }
322 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
323 enum tree_code, tree, bool);
325 bool meet_with (widest_int, widest_int, unsigned);
327 void print (FILE *);
329 private:
330 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value, m_mask;
337 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
338 void get_value_and_mask (tree, widest_int *, widest_int *);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
345 public:
346 Value_Range m_vr;
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange &p_vr);
352 bool meet_with (const ipcp_vr_lattice &other);
353 void init (tree type);
354 void print (FILE * f);
356 private:
357 bool meet_with_1 (const vrange &other_vr);
360 inline void
361 ipcp_vr_lattice::init (tree type)
363 if (type)
364 m_vr.set_type (type);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
375 public:
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice<tree> itself;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice *aggs;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range;
386 /* Number of aggregate lattices */
387 int aggs_count;
388 /* True if aggregate data were passed by reference (as opposed to by
389 value). */
390 bool aggs_by_ref;
391 /* All aggregate lattices contain a variable component (in addition to
392 values). */
393 bool aggs_contain_variable;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
396 bool aggs_bottom;
398 /* There is a virtual call based on this parameter. */
399 bool virt_call;
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count;
420 /* Original overall size of the program. */
422 static long overall_size, orig_overall_size;
424 /* Node name to unique clone suffix number map. */
425 static hash_map<const char *, unsigned> *clone_num_suffixes;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices *
430 ipa_get_parm_lattices (class ipa_node_params *info, int i)
432 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
433 gcc_checking_assert (!info->ipcp_orig_node);
434 gcc_checking_assert (info->lattices);
435 return &(info->lattices[i]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice<tree> *
441 ipa_get_scalar_lat (class ipa_node_params *info, int i)
443 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
444 return &plats->itself;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice<ipa_polymorphic_call_context> *
450 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
452 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
453 return &plats->ctxlat;
456 /* Return whether LAT is a lattice with a single constant and without an
457 undefined value. */
459 template <typename valtype>
460 inline bool
461 ipcp_lattice<valtype>::is_single_const ()
463 if (bottom || contains_variable || values_count != 1)
464 return false;
465 else
466 return true;
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
471 static bool
472 values_equal_for_ipcp_p (tree x, tree y)
474 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
476 if (x == y)
477 return true;
479 if (TREE_CODE (x) == ADDR_EXPR
480 && TREE_CODE (y) == ADDR_EXPR
481 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
482 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
483 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
484 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
485 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
486 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
487 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
488 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
489 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
490 else
491 return operand_equal_p (x, y, 0);
494 /* Print V which is extracted from a value in a lattice to F. */
496 static void
497 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
499 v.dump(f, false);
502 /* Print a lattice LAT to F. */
504 template <typename valtype>
505 void
506 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
508 ipcp_value<valtype> *val;
509 bool prev = false;
511 if (bottom)
513 fprintf (f, "BOTTOM\n");
514 return;
517 if (!values_count && !contains_variable)
519 fprintf (f, "TOP\n");
520 return;
523 if (contains_variable)
525 fprintf (f, "VARIABLE");
526 prev = true;
527 if (dump_benefits)
528 fprintf (f, "\n");
531 for (val = values; val; val = val->next)
533 if (dump_benefits && prev)
534 fprintf (f, " ");
535 else if (!dump_benefits && prev)
536 fprintf (f, ", ");
537 else
538 prev = true;
540 print_ipcp_constant_value (f, val->value);
542 if (dump_sources)
544 ipcp_value_source<valtype> *s;
546 if (val->self_recursion_generated_p ())
547 fprintf (f, " [self_gen(%i), from:",
548 val->self_recursion_generated_level);
549 else
550 fprintf (f, " [scc: %i, from:", val->scc_no);
551 for (s = val->sources; s; s = s->next)
552 fprintf (f, " %i(%f)", s->cs->caller->order,
553 s->cs->sreal_frequency ().to_double ());
554 fprintf (f, "]");
557 if (dump_benefits)
558 fprintf (f, " [loc_time: %g, loc_size: %i, "
559 "prop_time: %g, prop_size: %i]\n",
560 val->local_time_benefit.to_double (), val->local_size_cost,
561 val->prop_time_benefit.to_double (), val->prop_size_cost);
563 if (!dump_benefits)
564 fprintf (f, "\n");
567 void
568 ipcp_bits_lattice::print (FILE *f)
570 if (top_p ())
571 fprintf (f, " Bits unknown (TOP)\n");
572 else if (bottom_p ())
573 fprintf (f, " Bits unusable (BOTTOM)\n");
574 else
576 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
577 fprintf (f, ", mask = "); print_hex (get_mask (), f);
578 fprintf (f, "\n");
582 /* Print value range lattice to F. */
584 void
585 ipcp_vr_lattice::print (FILE * f)
587 m_vr.dump (f);
590 /* Print all ipcp_lattices of all functions to F. */
592 static void
593 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
595 struct cgraph_node *node;
596 int i, count;
598 fprintf (f, "\nLattices:\n");
599 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
601 class ipa_node_params *info;
603 info = ipa_node_params_sum->get (node);
604 /* Skip unoptimized functions and constprop clones since we don't make
605 lattices for them. */
606 if (!info || info->ipcp_orig_node)
607 continue;
608 fprintf (f, " Node: %s:\n", node->dump_name ());
609 count = ipa_get_param_count (info);
610 for (i = 0; i < count; i++)
612 struct ipcp_agg_lattice *aglat;
613 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
614 fprintf (f, " param [%d]: ", i);
615 plats->itself.print (f, dump_sources, dump_benefits);
616 fprintf (f, " ctxs: ");
617 plats->ctxlat.print (f, dump_sources, dump_benefits);
618 plats->bits_lattice.print (f);
619 fprintf (f, " ");
620 plats->m_value_range.print (f);
621 fprintf (f, "\n");
622 if (plats->virt_call)
623 fprintf (f, " virt_call flag set\n");
625 if (plats->aggs_bottom)
627 fprintf (f, " AGGS BOTTOM\n");
628 continue;
630 if (plats->aggs_contain_variable)
631 fprintf (f, " AGGS VARIABLE\n");
632 for (aglat = plats->aggs; aglat; aglat = aglat->next)
634 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
635 plats->aggs_by_ref ? "ref " : "", aglat->offset);
636 aglat->print (f, dump_sources, dump_benefits);
642 /* Determine whether it is at all technically possible to create clones of NODE
643 and store this information in the ipa_node_params structure associated
644 with NODE. */
646 static void
647 determine_versionability (struct cgraph_node *node,
648 class ipa_node_params *info)
650 const char *reason = NULL;
652 /* There are a number of generic reasons functions cannot be versioned. We
653 also cannot remove parameters if there are type attributes such as fnspec
654 present. */
655 if (node->alias || node->thunk)
656 reason = "alias or thunk";
657 else if (!node->versionable)
658 reason = "not a tree_versionable_function";
659 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
660 reason = "insufficient body availability";
661 else if (!opt_for_fn (node->decl, optimize)
662 || !opt_for_fn (node->decl, flag_ipa_cp))
663 reason = "non-optimized function";
664 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
666 /* Ideally we should clone the SIMD clones themselves and create
667 vector copies of them, so IPA-cp and SIMD clones can happily
668 coexist, but that may not be worth the effort. */
669 reason = "function has SIMD clones";
671 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
673 /* Ideally we should clone the target clones themselves and create
674 copies of them, so IPA-cp and target clones can happily
675 coexist, but that may not be worth the effort. */
676 reason = "function target_clones attribute";
678 /* Don't clone decls local to a comdat group; it breaks and for C++
679 decloned constructors, inlining is always better anyway. */
680 else if (node->comdat_local_p ())
681 reason = "comdat-local function";
682 else if (node->calls_comdat_local)
684 /* TODO: call is versionable if we make sure that all
685 callers are inside of a comdat group. */
686 reason = "calls comdat-local function";
689 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
690 work only when inlined. Cloning them may still lead to better code
691 because ipa-cp will not give up on cloning further. If the function is
692 external this however leads to wrong code because we may end up producing
693 offline copy of the function. */
694 if (DECL_EXTERNAL (node->decl))
695 for (cgraph_edge *edge = node->callees; !reason && edge;
696 edge = edge->next_callee)
697 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
699 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
700 reason = "external function which calls va_arg_pack";
701 if (DECL_FUNCTION_CODE (edge->callee->decl)
702 == BUILT_IN_VA_ARG_PACK_LEN)
703 reason = "external function which calls va_arg_pack_len";
706 if (reason && dump_file && !node->alias && !node->thunk)
707 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
708 node->dump_name (), reason);
710 info->versionable = (reason == NULL);
713 /* Return true if it is at all technically possible to create clones of a
714 NODE. */
716 static bool
717 ipcp_versionable_function_p (struct cgraph_node *node)
719 ipa_node_params *info = ipa_node_params_sum->get (node);
720 return info && info->versionable;
723 /* Structure holding accumulated information about callers of a node. */
725 struct caller_statistics
727 /* If requested (see below), self-recursive call counts are summed into this
728 field. */
729 profile_count rec_count_sum;
730 /* The sum of all ipa counts of all the other (non-recursive) calls. */
731 profile_count count_sum;
732 /* Sum of all frequencies for all calls. */
733 sreal freq_sum;
734 /* Number of calls and hot calls respectively. */
735 int n_calls, n_hot_calls;
736 /* If itself is set up, also count the number of non-self-recursive
737 calls. */
738 int n_nonrec_calls;
739 /* If non-NULL, this is the node itself and calls from it should have their
740 counts included in rec_count_sum and not count_sum. */
741 cgraph_node *itself;
744 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
745 from IGNORED_CALLER are not counted. */
747 static inline void
748 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
750 stats->rec_count_sum = profile_count::zero ();
751 stats->count_sum = profile_count::zero ();
752 stats->n_calls = 0;
753 stats->n_hot_calls = 0;
754 stats->n_nonrec_calls = 0;
755 stats->freq_sum = 0;
756 stats->itself = itself;
759 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
760 non-thunk incoming edges to NODE. */
762 static bool
763 gather_caller_stats (struct cgraph_node *node, void *data)
765 struct caller_statistics *stats = (struct caller_statistics *) data;
766 struct cgraph_edge *cs;
768 for (cs = node->callers; cs; cs = cs->next_caller)
769 if (!cs->caller->thunk)
771 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
772 if (info && info->node_dead)
773 continue;
775 if (cs->count.ipa ().initialized_p ())
777 if (stats->itself && stats->itself == cs->caller)
778 stats->rec_count_sum += cs->count.ipa ();
779 else
780 stats->count_sum += cs->count.ipa ();
782 stats->freq_sum += cs->sreal_frequency ();
783 stats->n_calls++;
784 if (stats->itself && stats->itself != cs->caller)
785 stats->n_nonrec_calls++;
787 if (cs->maybe_hot_p ())
788 stats->n_hot_calls ++;
790 return false;
794 /* Return true if this NODE is viable candidate for cloning. */
796 static bool
797 ipcp_cloning_candidate_p (struct cgraph_node *node)
799 struct caller_statistics stats;
801 gcc_checking_assert (node->has_gimple_body_p ());
803 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
805 if (dump_file)
806 fprintf (dump_file, "Not considering %s for cloning; "
807 "-fipa-cp-clone disabled.\n",
808 node->dump_name ());
809 return false;
812 if (node->optimize_for_size_p ())
814 if (dump_file)
815 fprintf (dump_file, "Not considering %s for cloning; "
816 "optimizing it for size.\n",
817 node->dump_name ());
818 return false;
821 init_caller_stats (&stats);
822 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
824 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
826 if (dump_file)
827 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
828 node->dump_name ());
829 return true;
832 /* When profile is available and function is hot, propagate into it even if
833 calls seems cold; constant propagation can improve function's speed
834 significantly. */
835 if (stats.count_sum > profile_count::zero ()
836 && node->count.ipa ().initialized_p ())
838 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
840 if (dump_file)
841 fprintf (dump_file, "Considering %s for cloning; "
842 "usually called directly.\n",
843 node->dump_name ());
844 return true;
847 if (!stats.n_hot_calls)
849 if (dump_file)
850 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
851 node->dump_name ());
852 return false;
854 if (dump_file)
855 fprintf (dump_file, "Considering %s for cloning.\n",
856 node->dump_name ());
857 return true;
860 template <typename valtype>
861 class value_topo_info
863 public:
864 /* Head of the linked list of topologically sorted values. */
865 ipcp_value<valtype> *values_topo;
866 /* Stack for creating SCCs, represented by a linked list too. */
867 ipcp_value<valtype> *stack;
868 /* Counter driving the algorithm in add_val_to_toposort. */
869 int dfs_counter;
871 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
873 void add_val (ipcp_value<valtype> *cur_val);
874 void propagate_effects ();
877 /* Arrays representing a topological ordering of call graph nodes and a stack
878 of nodes used during constant propagation and also data required to perform
879 topological sort of values and propagation of benefits in the determined
880 order. */
882 class ipa_topo_info
884 public:
885 /* Array with obtained topological order of cgraph nodes. */
886 struct cgraph_node **order;
887 /* Stack of cgraph nodes used during propagation within SCC until all values
888 in the SCC stabilize. */
889 struct cgraph_node **stack;
890 int nnodes, stack_top;
892 value_topo_info<tree> constants;
893 value_topo_info<ipa_polymorphic_call_context> contexts;
895 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
896 constants ()
900 /* Skip edges from and to nodes without ipa_cp enabled.
901 Ignore not available symbols. */
903 static bool
904 ignore_edge_p (cgraph_edge *e)
906 enum availability avail;
907 cgraph_node *ultimate_target
908 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
910 return (avail <= AVAIL_INTERPOSABLE
911 || !opt_for_fn (ultimate_target->decl, optimize)
912 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
915 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
917 static void
918 build_toporder_info (class ipa_topo_info *topo)
920 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
921 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
923 gcc_checking_assert (topo->stack_top == 0);
924 topo->nnodes = ipa_reduced_postorder (topo->order, true,
925 ignore_edge_p);
928 /* Free information about strongly connected components and the arrays in
929 TOPO. */
931 static void
932 free_toporder_info (class ipa_topo_info *topo)
934 ipa_free_postorder_info ();
935 free (topo->order);
936 free (topo->stack);
939 /* Add NODE to the stack in TOPO, unless it is already there. */
941 static inline void
942 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
944 ipa_node_params *info = ipa_node_params_sum->get (node);
945 if (info->node_enqueued)
946 return;
947 info->node_enqueued = 1;
948 topo->stack[topo->stack_top++] = node;
951 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
952 is empty. */
954 static struct cgraph_node *
955 pop_node_from_stack (class ipa_topo_info *topo)
957 if (topo->stack_top)
959 struct cgraph_node *node;
960 topo->stack_top--;
961 node = topo->stack[topo->stack_top];
962 ipa_node_params_sum->get (node)->node_enqueued = 0;
963 return node;
965 else
966 return NULL;
969 /* Set lattice LAT to bottom and return true if it previously was not set as
970 such. */
972 template <typename valtype>
973 inline bool
974 ipcp_lattice<valtype>::set_to_bottom ()
976 bool ret = !bottom;
977 bottom = true;
978 return ret;
981 /* Mark lattice as containing an unknown value and return true if it previously
982 was not marked as such. */
984 template <typename valtype>
985 inline bool
986 ipcp_lattice<valtype>::set_contains_variable ()
988 bool ret = !contains_variable;
989 contains_variable = true;
990 return ret;
993 /* Set all aggregate lattices in PLATS to bottom and return true if they were
994 not previously set as such. */
996 static inline bool
997 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
999 bool ret = !plats->aggs_bottom;
1000 plats->aggs_bottom = true;
1001 return ret;
1004 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1005 return true if they were not previously marked as such. */
1007 static inline bool
1008 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1010 bool ret = !plats->aggs_contain_variable;
1011 plats->aggs_contain_variable = true;
1012 return ret;
1015 bool
1016 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1018 return meet_with_1 (other.m_vr);
1021 /* Meet the current value of the lattice with the range described by
1022 P_VR. */
1024 bool
1025 ipcp_vr_lattice::meet_with (const vrange &p_vr)
1027 return meet_with_1 (p_vr);
1030 /* Meet the current value of the lattice with the range described by
1031 OTHER_VR. Return TRUE if anything changed. */
1033 bool
1034 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
1036 if (bottom_p ())
1037 return false;
1039 if (other_vr.varying_p ())
1040 return set_to_bottom ();
1042 bool res;
1043 if (flag_checking)
1045 Value_Range save (m_vr);
1046 res = m_vr.union_ (other_vr);
1047 gcc_assert (res == (m_vr != save));
1049 else
1050 res = m_vr.union_ (other_vr);
1051 return res;
1054 /* Return true if value range information in the lattice is yet unknown. */
1056 bool
1057 ipcp_vr_lattice::top_p () const
1059 return m_vr.undefined_p ();
1062 /* Return true if value range information in the lattice is known to be
1063 unusable. */
1065 bool
1066 ipcp_vr_lattice::bottom_p () const
1068 return m_vr.varying_p ();
1071 /* Set value range information in the lattice to bottom. Return true if it
1072 previously was in a different state. */
1074 bool
1075 ipcp_vr_lattice::set_to_bottom ()
1077 if (m_vr.varying_p ())
1078 return false;
1080 /* Setting an unsupported type here forces the temporary to default
1081 to unsupported_range, which can handle VARYING/DEFINED ranges,
1082 but nothing else (union, intersect, etc). This allows us to set
1083 bottoms on any ranges, and is safe as all users of the lattice
1084 check for bottom first. */
1085 m_vr.set_type (void_type_node);
1086 m_vr.set_varying (void_type_node);
1088 return true;
1091 /* Set lattice value to bottom, if it already isn't the case. */
1093 bool
1094 ipcp_bits_lattice::set_to_bottom ()
1096 if (bottom_p ())
1097 return false;
1098 m_lattice_val = IPA_BITS_VARYING;
1099 m_value = 0;
1100 m_mask = -1;
1101 return true;
1104 /* Set to constant if it isn't already. Only meant to be called
1105 when switching state from TOP. */
1107 bool
1108 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1110 gcc_assert (top_p ());
1111 m_lattice_val = IPA_BITS_CONSTANT;
1112 m_value = wi::bit_and (wi::bit_not (mask), value);
1113 m_mask = mask;
1114 return true;
1117 /* Return true if any of the known bits are non-zero. */
1119 bool
1120 ipcp_bits_lattice::known_nonzero_p () const
1122 if (!constant_p ())
1123 return false;
1124 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1127 /* Convert operand to value, mask form. */
1129 void
1130 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1132 wide_int get_nonzero_bits (const_tree);
1134 if (TREE_CODE (operand) == INTEGER_CST)
1136 *valuep = wi::to_widest (operand);
1137 *maskp = 0;
1139 else
1141 *valuep = 0;
1142 *maskp = -1;
1146 /* Meet operation, similar to ccp_lattice_meet, we xor values
1147 if this->value, value have different values at same bit positions, we want
1148 to drop that bit to varying. Return true if mask is changed.
1149 This function assumes that the lattice value is in CONSTANT state. If
1150 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1152 bool
1153 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1154 unsigned precision, bool drop_all_ones)
1156 gcc_assert (constant_p ());
1158 widest_int old_mask = m_mask;
1159 m_mask = (m_mask | mask) | (m_value ^ value);
1160 if (drop_all_ones)
1161 m_mask |= m_value;
1162 m_value &= ~m_mask;
1164 if (wi::sext (m_mask, precision) == -1)
1165 return set_to_bottom ();
1167 return m_mask != old_mask;
1170 /* Meet the bits lattice with operand
1171 described by <value, mask, sgn, precision. */
1173 bool
1174 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1175 unsigned precision)
1177 if (bottom_p ())
1178 return false;
1180 if (top_p ())
1182 if (wi::sext (mask, precision) == -1)
1183 return set_to_bottom ();
1184 return set_to_constant (value, mask);
1187 return meet_with_1 (value, mask, precision, false);
1190 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1191 if code is binary operation or bit_value_unop (other) if code is unary op.
1192 In the case when code is nop_expr, no adjustment is required. If
1193 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1195 bool
1196 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1197 signop sgn, enum tree_code code, tree operand,
1198 bool drop_all_ones)
1200 if (other.bottom_p ())
1201 return set_to_bottom ();
1203 if (bottom_p () || other.top_p ())
1204 return false;
1206 widest_int adjusted_value, adjusted_mask;
1208 if (TREE_CODE_CLASS (code) == tcc_binary)
1210 tree type = TREE_TYPE (operand);
1211 widest_int o_value, o_mask;
1212 get_value_and_mask (operand, &o_value, &o_mask);
1214 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1215 sgn, precision, other.get_value (), other.get_mask (),
1216 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1218 if (wi::sext (adjusted_mask, precision) == -1)
1219 return set_to_bottom ();
1222 else if (TREE_CODE_CLASS (code) == tcc_unary)
1224 bit_value_unop (code, sgn, precision, &adjusted_value,
1225 &adjusted_mask, sgn, precision, other.get_value (),
1226 other.get_mask ());
1228 if (wi::sext (adjusted_mask, precision) == -1)
1229 return set_to_bottom ();
1232 else
1233 return set_to_bottom ();
1235 if (top_p ())
1237 if (drop_all_ones)
1239 adjusted_mask |= adjusted_value;
1240 adjusted_value &= ~adjusted_mask;
1242 if (wi::sext (adjusted_mask, precision) == -1)
1243 return set_to_bottom ();
1244 return set_to_constant (adjusted_value, adjusted_mask);
1246 else
1247 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1248 drop_all_ones);
1251 /* Dump the contents of the list to FILE. */
1253 void
1254 ipa_argagg_value_list::dump (FILE *f)
1256 bool comma = false;
1257 for (const ipa_argagg_value &av : m_elts)
1259 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1260 av.index, av.unit_offset);
1261 print_generic_expr (f, av.value);
1262 if (av.by_ref)
1263 fprintf (f, "(by_ref)");
1264 if (av.killed)
1265 fprintf (f, "(killed)");
1266 comma = true;
1268 fprintf (f, "\n");
1271 /* Dump the contents of the list to stderr. */
1273 void
1274 ipa_argagg_value_list::debug ()
1276 dump (stderr);
1279 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1280 NULL if there is no such constant. */
1282 const ipa_argagg_value *
1283 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1285 ipa_argagg_value key;
1286 key.index = index;
1287 key.unit_offset = unit_offset;
1288 const ipa_argagg_value *res
1289 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1290 [] (const ipa_argagg_value &elt,
1291 const ipa_argagg_value &val)
1293 if (elt.index < val.index)
1294 return true;
1295 if (elt.index > val.index)
1296 return false;
1297 if (elt.unit_offset < val.unit_offset)
1298 return true;
1299 return false;
1302 if (res == m_elts.end ()
1303 || res->index != index
1304 || res->unit_offset != unit_offset)
1305 res = nullptr;
1307 /* TODO: perhaps remove the check (that the underlying array is indeed
1308 sorted) if it turns out it can be too slow? */
1309 if (!flag_checking)
1310 return res;
1312 const ipa_argagg_value *slow_res = NULL;
1313 int prev_index = -1;
1314 unsigned prev_unit_offset = 0;
1315 for (const ipa_argagg_value &av : m_elts)
1317 gcc_assert (prev_index < 0
1318 || prev_index < av.index
1319 || prev_unit_offset < av.unit_offset);
1320 prev_index = av.index;
1321 prev_unit_offset = av.unit_offset;
1322 if (av.index == index
1323 && av.unit_offset == unit_offset)
1324 slow_res = &av;
1326 gcc_assert (res == slow_res);
1328 return res;
1331 /* Return the first item describing a constant stored for parameter with INDEX,
1332 regardless of offset or reference, or NULL if there is no such constant. */
1334 const ipa_argagg_value *
1335 ipa_argagg_value_list::get_elt_for_index (int index) const
1337 const ipa_argagg_value *res
1338 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1339 [] (const ipa_argagg_value &elt, unsigned idx)
1341 return elt.index < idx;
1343 if (res == m_elts.end ()
1344 || res->index != index)
1345 res = nullptr;
1346 return res;
1349 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1350 performing any check of whether value is passed by reference, or NULL_TREE
1351 if there is no such constant. */
1353 tree
1354 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1356 const ipa_argagg_value *av = get_elt (index, unit_offset);
1357 return av ? av->value : NULL_TREE;
1360 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1361 passed by reference or not according to BY_REF, or NULL_TREE if there is
1362 no such constant. */
1364 tree
1365 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1366 bool by_ref) const
1368 const ipa_argagg_value *av = get_elt (index, unit_offset);
1369 if (av && av->by_ref == by_ref)
1370 return av->value;
1371 return NULL_TREE;
1374 /* Return true if all elements present in OTHER are also present in this
1375 list. */
1377 bool
1378 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1380 unsigned j = 0;
1381 for (unsigned i = 0; i < other.m_elts.size (); i++)
1383 unsigned other_index = other.m_elts[i].index;
1384 unsigned other_offset = other.m_elts[i].unit_offset;
1386 while (j < m_elts.size ()
1387 && (m_elts[j].index < other_index
1388 || (m_elts[j].index == other_index
1389 && m_elts[j].unit_offset < other_offset)))
1390 j++;
1392 if (j >= m_elts.size ()
1393 || m_elts[j].index != other_index
1394 || m_elts[j].unit_offset != other_offset
1395 || m_elts[j].by_ref != other.m_elts[i].by_ref
1396 || !m_elts[j].value
1397 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1398 return false;
1400 return true;
1403 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1404 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1405 offsets but skip those which would end up with a negative offset. */
1407 void
1408 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1409 unsigned dest_index,
1410 unsigned unit_delta,
1411 vec<ipa_argagg_value> *res) const
1413 const ipa_argagg_value *av = get_elt_for_index (src_index);
1414 if (!av)
1415 return;
1416 unsigned prev_unit_offset = 0;
1417 bool first = true;
1418 for (; av < m_elts.end (); ++av)
1420 if (av->index > src_index)
1421 return;
1422 if (av->index == src_index
1423 && (av->unit_offset >= unit_delta)
1424 && av->value)
1426 ipa_argagg_value new_av;
1427 gcc_checking_assert (av->value);
1428 new_av.value = av->value;
1429 new_av.unit_offset = av->unit_offset - unit_delta;
1430 new_av.index = dest_index;
1431 new_av.by_ref = av->by_ref;
1432 gcc_assert (!av->killed);
1433 new_av.killed = false;
1435 /* Quick check that the offsets we push are indeed increasing. */
1436 gcc_assert (first
1437 || new_av.unit_offset > prev_unit_offset);
1438 prev_unit_offset = new_av.unit_offset;
1439 first = false;
1441 res->safe_push (new_av);
1446 /* Push to RES information about single lattices describing aggregate values in
1447 PLATS as those describing parameter DEST_INDEX and the original offset minus
1448 UNIT_DELTA. Return true if any item has been pushed to RES. */
1450 static bool
1451 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1452 unsigned unit_delta,
1453 vec<ipa_argagg_value> *res)
1455 if (plats->aggs_contain_variable)
1456 return false;
1458 bool pushed_sth = false;
1459 bool first = true;
1460 unsigned prev_unit_offset = 0;
1461 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1462 if (aglat->is_single_const ()
1463 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1465 ipa_argagg_value iav;
1466 iav.value = aglat->values->value;
1467 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1468 iav.index = dest_index;
1469 iav.by_ref = plats->aggs_by_ref;
1470 iav.killed = false;
1472 gcc_assert (first
1473 || iav.unit_offset > prev_unit_offset);
1474 prev_unit_offset = iav.unit_offset;
1475 first = false;
1477 pushed_sth = true;
1478 res->safe_push (iav);
1480 return pushed_sth;
1483 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1484 Return the number of remaining valid entries. */
1486 static unsigned
1487 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1488 const vec<ipa_argagg_value> &other)
1490 unsigned valid_entries = 0;
1491 unsigned j = 0;
1492 for (unsigned i = 0; i < elts.length (); i++)
1494 if (!elts[i].value)
1495 continue;
1497 unsigned this_index = elts[i].index;
1498 unsigned this_offset = elts[i].unit_offset;
1500 while (j < other.length ()
1501 && (other[j].index < this_index
1502 || (other[j].index == this_index
1503 && other[j].unit_offset < this_offset)))
1504 j++;
1506 if (j >= other.length ())
1508 elts[i].value = NULL_TREE;
1509 continue;
1512 if (other[j].index == this_index
1513 && other[j].unit_offset == this_offset
1514 && other[j].by_ref == elts[i].by_ref
1515 && other[j].value
1516 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1517 valid_entries++;
1518 else
1519 elts[i].value = NULL_TREE;
1521 return valid_entries;
1524 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1525 return true is any of them has not been marked as such so far. */
1527 static inline bool
1528 set_all_contains_variable (class ipcp_param_lattices *plats)
1530 bool ret;
1531 ret = plats->itself.set_contains_variable ();
1532 ret |= plats->ctxlat.set_contains_variable ();
1533 ret |= set_agg_lats_contain_variable (plats);
1534 ret |= plats->bits_lattice.set_to_bottom ();
1535 ret |= plats->m_value_range.set_to_bottom ();
1536 return ret;
1539 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1540 points to by the number of callers to NODE. */
1542 static bool
1543 count_callers (cgraph_node *node, void *data)
1545 int *caller_count = (int *) data;
1547 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1548 /* Local thunks can be handled transparently, but if the thunk cannot
1549 be optimized out, count it as a real use. */
1550 if (!cs->caller->thunk || !cs->caller->local)
1551 ++*caller_count;
1552 return false;
1555 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1556 the one caller of some other node. Set the caller's corresponding flag. */
1558 static bool
1559 set_single_call_flag (cgraph_node *node, void *)
1561 cgraph_edge *cs = node->callers;
1562 /* Local thunks can be handled transparently, skip them. */
1563 while (cs && cs->caller->thunk && cs->caller->local)
1564 cs = cs->next_caller;
1565 if (cs)
1566 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1568 info->node_calling_single_call = true;
1569 return true;
1571 return false;
1574 /* Initialize ipcp_lattices. */
1576 static void
1577 initialize_node_lattices (struct cgraph_node *node)
1579 ipa_node_params *info = ipa_node_params_sum->get (node);
1580 struct cgraph_edge *ie;
1581 bool disable = false, variable = false;
1582 int i;
1584 gcc_checking_assert (node->has_gimple_body_p ());
1586 if (!ipa_get_param_count (info))
1587 disable = true;
1588 else if (node->local)
1590 int caller_count = 0;
1591 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1592 true);
1593 gcc_checking_assert (caller_count > 0);
1594 if (caller_count == 1)
1595 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1596 NULL, true);
1598 else
1600 /* When cloning is allowed, we can assume that externally visible
1601 functions are not called. We will compensate this by cloning
1602 later. */
1603 if (ipcp_versionable_function_p (node)
1604 && ipcp_cloning_candidate_p (node))
1605 variable = true;
1606 else
1607 disable = true;
1610 if (dump_file && (dump_flags & TDF_DETAILS)
1611 && !node->alias && !node->thunk)
1613 fprintf (dump_file, "Initializing lattices of %s\n",
1614 node->dump_name ());
1615 if (disable || variable)
1616 fprintf (dump_file, " Marking all lattices as %s\n",
1617 disable ? "BOTTOM" : "VARIABLE");
1620 auto_vec<bool, 16> surviving_params;
1621 bool pre_modified = false;
1623 clone_info *cinfo = clone_info::get (node);
1625 if (!disable && cinfo && cinfo->param_adjustments)
1627 /* At the moment all IPA optimizations should use the number of
1628 parameters of the prevailing decl as the m_always_copy_start.
1629 Handling any other value would complicate the code below, so for the
1630 time bing let's only assert it is so. */
1631 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1632 == ipa_get_param_count (info))
1633 || cinfo->param_adjustments->m_always_copy_start < 0);
1635 pre_modified = true;
1636 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1638 if (dump_file && (dump_flags & TDF_DETAILS)
1639 && !node->alias && !node->thunk)
1641 bool first = true;
1642 for (int j = 0; j < ipa_get_param_count (info); j++)
1644 if (j < (int) surviving_params.length ()
1645 && surviving_params[j])
1646 continue;
1647 if (first)
1649 fprintf (dump_file,
1650 " The following parameters are dead on arrival:");
1651 first = false;
1653 fprintf (dump_file, " %u", j);
1655 if (!first)
1656 fprintf (dump_file, "\n");
1660 for (i = 0; i < ipa_get_param_count (info); i++)
1662 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1663 tree type = ipa_get_type (info, i);
1664 if (disable
1665 || !ipa_get_type (info, i)
1666 || (pre_modified && (surviving_params.length () <= (unsigned) i
1667 || !surviving_params[i])))
1669 plats->itself.set_to_bottom ();
1670 plats->ctxlat.set_to_bottom ();
1671 set_agg_lats_to_bottom (plats);
1672 plats->bits_lattice.set_to_bottom ();
1673 plats->m_value_range.init (type);
1674 plats->m_value_range.set_to_bottom ();
1676 else
1678 plats->m_value_range.init (type);
1679 if (variable)
1680 set_all_contains_variable (plats);
1684 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1685 if (ie->indirect_info->polymorphic
1686 && ie->indirect_info->param_index >= 0)
1688 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1689 ipa_get_parm_lattices (info,
1690 ie->indirect_info->param_index)->virt_call = 1;
1694 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1695 PARAM_TYPE. */
1697 static bool
1698 ipacp_value_safe_for_type (tree param_type, tree value)
1700 tree val_type = TREE_TYPE (value);
1701 if (param_type == val_type
1702 || useless_type_conversion_p (param_type, val_type)
1703 || fold_convertible_p (param_type, value))
1704 return true;
1705 else
1706 return false;
1709 /* Return the result of a (possibly arithmetic) operation on the constant
1710 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1711 the type of the parameter to which the result is passed. Return
1712 NULL_TREE if that cannot be determined or be considered an
1713 interprocedural invariant. */
1715 static tree
1716 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1717 tree res_type)
1719 tree res;
1721 if (opcode == NOP_EXPR)
1722 return input;
1723 if (!is_gimple_ip_invariant (input))
1724 return NULL_TREE;
1726 if (opcode == ASSERT_EXPR)
1728 if (values_equal_for_ipcp_p (input, operand))
1729 return input;
1730 else
1731 return NULL_TREE;
1734 if (!res_type)
1736 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1737 res_type = boolean_type_node;
1738 else if (expr_type_first_operand_type_p (opcode))
1739 res_type = TREE_TYPE (input);
1740 else
1741 return NULL_TREE;
1744 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1745 res = fold_unary (opcode, res_type, input);
1746 else
1747 res = fold_binary (opcode, res_type, input, operand);
1749 if (res && !is_gimple_ip_invariant (res))
1750 return NULL_TREE;
1752 return res;
1755 /* Return the result of a (possibly arithmetic) pass through jump function
1756 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1757 to which the result is passed. Return NULL_TREE if that cannot be
1758 determined or be considered an interprocedural invariant. */
1760 static tree
1761 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1762 tree res_type)
1764 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1765 input,
1766 ipa_get_jf_pass_through_operand (jfunc),
1767 res_type);
1770 /* Return the result of an ancestor jump function JFUNC on the constant value
1771 INPUT. Return NULL_TREE if that cannot be determined. */
1773 static tree
1774 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1776 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1777 if (TREE_CODE (input) == ADDR_EXPR)
1779 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1780 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1781 if (known_eq (off, 0))
1782 return input;
1783 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1784 return build1 (ADDR_EXPR, TREE_TYPE (input),
1785 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1786 build_int_cst (ptr_type_node, byte_offset)));
1788 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1789 && zerop (input))
1790 return input;
1791 else
1792 return NULL_TREE;
1795 /* Determine whether JFUNC evaluates to a single known constant value and if
1796 so, return it. Otherwise return NULL. INFO describes the caller node or
1797 the one it is inlined to, so that pass-through jump functions can be
1798 evaluated. PARM_TYPE is the type of the parameter to which the result is
1799 passed. */
1801 tree
1802 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1803 tree parm_type)
1805 if (jfunc->type == IPA_JF_CONST)
1806 return ipa_get_jf_constant (jfunc);
1807 else if (jfunc->type == IPA_JF_PASS_THROUGH
1808 || jfunc->type == IPA_JF_ANCESTOR)
1810 tree input;
1811 int idx;
1813 if (jfunc->type == IPA_JF_PASS_THROUGH)
1814 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1815 else
1816 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1818 if (info->ipcp_orig_node)
1819 input = info->known_csts[idx];
1820 else
1822 ipcp_lattice<tree> *lat;
1824 if (!info->lattices
1825 || idx >= ipa_get_param_count (info))
1826 return NULL_TREE;
1827 lat = ipa_get_scalar_lat (info, idx);
1828 if (!lat->is_single_const ())
1829 return NULL_TREE;
1830 input = lat->values->value;
1833 if (!input)
1834 return NULL_TREE;
1836 if (jfunc->type == IPA_JF_PASS_THROUGH)
1837 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1838 else
1839 return ipa_get_jf_ancestor_result (jfunc, input);
1841 else
1842 return NULL_TREE;
1845 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1846 that INFO describes the caller node or the one it is inlined to, CS is the
1847 call graph edge corresponding to JFUNC and CSIDX index of the described
1848 parameter. */
1850 ipa_polymorphic_call_context
1851 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1852 ipa_jump_func *jfunc)
1854 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1855 ipa_polymorphic_call_context ctx;
1856 ipa_polymorphic_call_context *edge_ctx
1857 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1859 if (edge_ctx && !edge_ctx->useless_p ())
1860 ctx = *edge_ctx;
1862 if (jfunc->type == IPA_JF_PASS_THROUGH
1863 || jfunc->type == IPA_JF_ANCESTOR)
1865 ipa_polymorphic_call_context srcctx;
1866 int srcidx;
1867 bool type_preserved = true;
1868 if (jfunc->type == IPA_JF_PASS_THROUGH)
1870 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1871 return ctx;
1872 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1873 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1875 else
1877 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1878 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1880 if (info->ipcp_orig_node)
1882 if (info->known_contexts.exists ())
1883 srcctx = info->known_contexts[srcidx];
1885 else
1887 if (!info->lattices
1888 || srcidx >= ipa_get_param_count (info))
1889 return ctx;
1890 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1891 lat = ipa_get_poly_ctx_lat (info, srcidx);
1892 if (!lat->is_single_const ())
1893 return ctx;
1894 srcctx = lat->values->value;
1896 if (srcctx.useless_p ())
1897 return ctx;
1898 if (jfunc->type == IPA_JF_ANCESTOR)
1899 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1900 if (!type_preserved)
1901 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1902 srcctx.combine_with (ctx);
1903 return srcctx;
1906 return ctx;
1909 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1910 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1911 the result is a range that is not VARYING nor UNDEFINED. */
1913 static bool
1914 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1915 const vrange &src_vr,
1916 enum tree_code operation,
1917 tree dst_type, tree src_type)
1919 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1920 return false;
1922 range_op_handler handler (operation);
1923 if (!handler)
1924 return false;
1926 Value_Range varying (dst_type);
1927 varying.set_varying (dst_type);
1929 return (handler.fold_range (dst_vr, dst_type, src_vr, varying)
1930 && !dst_vr.varying_p ()
1931 && !dst_vr.undefined_p ());
1934 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1935 first be extracted onto a vrange. */
1937 static bool
1938 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1939 const ipa_vr &src_vr,
1940 enum tree_code operation,
1941 tree dst_type, tree src_type)
1943 Value_Range tmp;
1944 src_vr.get_vrange (tmp);
1945 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1946 dst_type, src_type);
1949 /* Determine range of JFUNC given that INFO describes the caller node or
1950 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1951 and PARM_TYPE of the parameter. */
1953 void
1954 ipa_value_range_from_jfunc (vrange &vr,
1955 ipa_node_params *info, cgraph_edge *cs,
1956 ipa_jump_func *jfunc, tree parm_type)
1958 vr.set_undefined ();
1960 if (jfunc->m_vr)
1961 ipa_vr_operation_and_type_effects (vr,
1962 *jfunc->m_vr,
1963 NOP_EXPR, parm_type,
1964 jfunc->m_vr->type ());
1965 if (vr.singleton_p ())
1966 return;
1967 if (jfunc->type == IPA_JF_PASS_THROUGH)
1969 int idx;
1970 ipcp_transformation *sum
1971 = ipcp_get_transformation_summary (cs->caller->inlined_to
1972 ? cs->caller->inlined_to
1973 : cs->caller);
1974 if (!sum || !sum->m_vr)
1975 return;
1977 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1979 if (!(*sum->m_vr)[idx].known_p ())
1980 return;
1981 tree vr_type = ipa_get_type (info, idx);
1982 Value_Range srcvr;
1983 (*sum->m_vr)[idx].get_vrange (srcvr);
1985 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1987 if (TREE_CODE_CLASS (operation) == tcc_unary)
1989 Value_Range res (vr_type);
1991 if (ipa_vr_operation_and_type_effects (res,
1992 srcvr,
1993 operation, parm_type,
1994 vr_type))
1995 vr.intersect (res);
1997 else
1999 Value_Range op_res (vr_type);
2000 Value_Range res (vr_type);
2001 tree op = ipa_get_jf_pass_through_operand (jfunc);
2002 Value_Range op_vr (vr_type);
2003 range_op_handler handler (operation);
2005 ipa_range_set_and_normalize (op_vr, op);
2007 if (!handler
2008 || !op_res.supports_type_p (vr_type)
2009 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
2010 op_res.set_varying (vr_type);
2012 if (ipa_vr_operation_and_type_effects (res,
2013 op_res,
2014 NOP_EXPR, parm_type,
2015 vr_type))
2016 vr.intersect (res);
2021 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2022 single known constant value and if so, return it. Otherwise return NULL.
2023 NODE and INFO describes the caller node or the one it is inlined to, and
2024 its related info. */
2026 tree
2027 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
2028 const ipa_agg_jf_item *item)
2030 tree value = NULL_TREE;
2031 int src_idx;
2033 if (item->offset < 0
2034 || item->jftype == IPA_JF_UNKNOWN
2035 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
2036 return NULL_TREE;
2038 if (item->jftype == IPA_JF_CONST)
2039 return item->value.constant;
2041 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2042 || item->jftype == IPA_JF_LOAD_AGG);
2044 src_idx = item->value.pass_through.formal_id;
2046 if (info->ipcp_orig_node)
2048 if (item->jftype == IPA_JF_PASS_THROUGH)
2049 value = info->known_csts[src_idx];
2050 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2052 ipa_argagg_value_list avl (ts);
2053 value = avl.get_value (src_idx,
2054 item->value.load_agg.offset / BITS_PER_UNIT,
2055 item->value.load_agg.by_ref);
2058 else if (info->lattices)
2060 class ipcp_param_lattices *src_plats
2061 = ipa_get_parm_lattices (info, src_idx);
2063 if (item->jftype == IPA_JF_PASS_THROUGH)
2065 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2067 if (!lat->is_single_const ())
2068 return NULL_TREE;
2070 value = lat->values->value;
2072 else if (src_plats->aggs
2073 && !src_plats->aggs_bottom
2074 && !src_plats->aggs_contain_variable
2075 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2077 struct ipcp_agg_lattice *aglat;
2079 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2081 if (aglat->offset > item->value.load_agg.offset)
2082 break;
2084 if (aglat->offset == item->value.load_agg.offset)
2086 if (aglat->is_single_const ())
2087 value = aglat->values->value;
2088 break;
2094 if (!value)
2095 return NULL_TREE;
2097 if (item->jftype == IPA_JF_LOAD_AGG)
2099 tree load_type = item->value.load_agg.type;
2100 tree value_type = TREE_TYPE (value);
2102 /* Ensure value type is compatible with load type. */
2103 if (!useless_type_conversion_p (load_type, value_type))
2104 return NULL_TREE;
2107 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2108 value,
2109 item->value.pass_through.operand,
2110 item->type);
2113 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2114 caller is inlined to) NODE which described by INFO and push the results to
2115 RES as describing values passed in parameter DST_INDEX. */
2117 void
2118 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2119 ipa_agg_jump_function *agg_jfunc,
2120 unsigned dst_index,
2121 vec<ipa_argagg_value> *res)
2123 unsigned prev_unit_offset = 0;
2124 bool first = true;
2126 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2128 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2129 if (!value)
2130 continue;
2132 ipa_argagg_value iav;
2133 iav.value = value;
2134 iav.unit_offset = item.offset / BITS_PER_UNIT;
2135 iav.index = dst_index;
2136 iav.by_ref = agg_jfunc->by_ref;
2137 iav.killed = 0;
2139 gcc_assert (first
2140 || iav.unit_offset > prev_unit_offset);
2141 prev_unit_offset = iav.unit_offset;
2142 first = false;
2144 res->safe_push (iav);
2148 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2149 bottom, not containing a variable component and without any known value at
2150 the same time. */
2152 DEBUG_FUNCTION void
2153 ipcp_verify_propagated_values (void)
2155 struct cgraph_node *node;
2157 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2159 ipa_node_params *info = ipa_node_params_sum->get (node);
2160 if (!opt_for_fn (node->decl, flag_ipa_cp)
2161 || !opt_for_fn (node->decl, optimize))
2162 continue;
2163 int i, count = ipa_get_param_count (info);
2165 for (i = 0; i < count; i++)
2167 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2169 if (!lat->bottom
2170 && !lat->contains_variable
2171 && lat->values_count == 0)
2173 if (dump_file)
2175 symtab->dump (dump_file);
2176 fprintf (dump_file, "\nIPA lattices after constant "
2177 "propagation, before gcc_unreachable:\n");
2178 print_all_lattices (dump_file, true, false);
2181 gcc_unreachable ();
2187 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2189 static bool
2190 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2191 ipa_polymorphic_call_context y)
2193 return x.equal_to (y);
2197 /* Add a new value source to the value represented by THIS, marking that a
2198 value comes from edge CS and (if the underlying jump function is a
2199 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2200 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2201 scalar value of the parameter itself or the offset within an aggregate. */
2203 template <typename valtype>
2204 void
2205 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2206 int src_idx, HOST_WIDE_INT offset)
2208 ipcp_value_source<valtype> *src;
2210 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2211 src->offset = offset;
2212 src->cs = cs;
2213 src->val = src_val;
2214 src->index = src_idx;
2216 src->next = sources;
2217 sources = src;
2220 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2221 SOURCE and clear all other fields. */
2223 static ipcp_value<tree> *
2224 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2226 ipcp_value<tree> *val;
2228 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2229 val->value = cst;
2230 val->self_recursion_generated_level = same_lat_gen_level;
2231 return val;
2234 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2235 value to SOURCE and clear all other fields. */
2237 static ipcp_value<ipa_polymorphic_call_context> *
2238 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2239 unsigned same_lat_gen_level)
2241 ipcp_value<ipa_polymorphic_call_context> *val;
2243 val = new (ipcp_poly_ctx_values_pool.allocate ())
2244 ipcp_value<ipa_polymorphic_call_context>();
2245 val->value = ctx;
2246 val->self_recursion_generated_level = same_lat_gen_level;
2247 return val;
2250 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2251 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2252 meaning. OFFSET -1 means the source is scalar and not a part of an
2253 aggregate. If non-NULL, VAL_P records address of existing or newly added
2254 ipcp_value.
2256 If the value is generated for a self-recursive call as a result of an
2257 arithmetic pass-through jump-function acting on a value in the same lattice,
2258 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2259 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2261 template <typename valtype>
2262 bool
2263 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2264 ipcp_value<valtype> *src_val,
2265 int src_idx, HOST_WIDE_INT offset,
2266 ipcp_value<valtype> **val_p,
2267 unsigned same_lat_gen_level)
2269 ipcp_value<valtype> *val, *last_val = NULL;
2271 if (val_p)
2272 *val_p = NULL;
2274 if (bottom)
2275 return false;
2277 for (val = values; val; last_val = val, val = val->next)
2278 if (values_equal_for_ipcp_p (val->value, newval))
2280 if (val_p)
2281 *val_p = val;
2283 if (val->self_recursion_generated_level < same_lat_gen_level)
2284 val->self_recursion_generated_level = same_lat_gen_level;
2286 if (ipa_edge_within_scc (cs))
2288 ipcp_value_source<valtype> *s;
2289 for (s = val->sources; s; s = s->next)
2290 if (s->cs == cs && s->val == src_val)
2291 break;
2292 if (s)
2293 return false;
2296 val->add_source (cs, src_val, src_idx, offset);
2297 return false;
2300 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2301 param_ipa_cp_value_list_size))
2303 /* We can only free sources, not the values themselves, because sources
2304 of other values in this SCC might point to them. */
2305 for (val = values; val; val = val->next)
2307 while (val->sources)
2309 ipcp_value_source<valtype> *src = val->sources;
2310 val->sources = src->next;
2311 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2314 values = NULL;
2315 return set_to_bottom ();
2318 values_count++;
2319 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2320 val->add_source (cs, src_val, src_idx, offset);
2321 val->next = NULL;
2323 /* Add the new value to end of value list, which can reduce iterations
2324 of propagation stage for recursive function. */
2325 if (last_val)
2326 last_val->next = val;
2327 else
2328 values = val;
2330 if (val_p)
2331 *val_p = val;
2333 return true;
2336 /* A helper function that returns result of operation specified by OPCODE on
2337 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2338 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2339 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2340 the result. */
2342 static tree
2343 get_val_across_arith_op (enum tree_code opcode,
2344 tree opnd1_type,
2345 tree opnd2,
2346 ipcp_value<tree> *src_val,
2347 tree res_type)
2349 tree opnd1 = src_val->value;
2351 /* Skip source values that is incompatible with specified type. */
2352 if (opnd1_type
2353 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2354 return NULL_TREE;
2356 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2359 /* Propagate values through an arithmetic transformation described by a jump
2360 function associated with edge CS, taking values from SRC_LAT and putting
2361 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2362 OPND2 is a constant value if transformation is a binary operation.
2363 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2364 a part of the aggregate. SRC_IDX is the index of the source parameter.
2365 RES_TYPE is the value type of result being propagated into. Return true if
2366 DEST_LAT changed. */
2368 static bool
2369 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2370 enum tree_code opcode,
2371 tree opnd1_type,
2372 tree opnd2,
2373 ipcp_lattice<tree> *src_lat,
2374 ipcp_lattice<tree> *dest_lat,
2375 HOST_WIDE_INT src_offset,
2376 int src_idx,
2377 tree res_type)
2379 ipcp_value<tree> *src_val;
2380 bool ret = false;
2382 /* Due to circular dependencies, propagating within an SCC through arithmetic
2383 transformation would create infinite number of values. But for
2384 self-feeding recursive function, we could allow propagation in a limited
2385 count, and this can enable a simple kind of recursive function versioning.
2386 For other scenario, we would just make lattices bottom. */
2387 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2389 int i;
2391 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2392 param_ipa_cp_max_recursive_depth);
2393 if (src_lat != dest_lat || max_recursive_depth < 1)
2394 return dest_lat->set_contains_variable ();
2396 /* No benefit if recursive execution is in low probability. */
2397 if (cs->sreal_frequency () * 100
2398 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2399 param_ipa_cp_min_recursive_probability))
2400 return dest_lat->set_contains_variable ();
2402 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2404 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2406 /* Now we do not use self-recursively generated value as propagation
2407 source, this is absolutely conservative, but could avoid explosion
2408 of lattice's value space, especially when one recursive function
2409 calls another recursive. */
2410 if (src_val->self_recursion_generated_p ())
2412 ipcp_value_source<tree> *s;
2414 /* If the lattice has already been propagated for the call site,
2415 no need to do that again. */
2416 for (s = src_val->sources; s; s = s->next)
2417 if (s->cs == cs)
2418 return dest_lat->set_contains_variable ();
2420 else
2421 val_seeds.safe_push (src_val);
2424 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2426 /* Recursively generate lattice values with a limited count. */
2427 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2429 for (int j = 1; j < max_recursive_depth; j++)
2431 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2432 src_val, res_type);
2433 if (!cstval
2434 || !ipacp_value_safe_for_type (res_type, cstval))
2435 break;
2437 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2438 src_offset, &src_val, j);
2439 gcc_checking_assert (src_val);
2442 ret |= dest_lat->set_contains_variable ();
2444 else
2445 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2447 /* Now we do not use self-recursively generated value as propagation
2448 source, otherwise it is easy to make value space of normal lattice
2449 overflow. */
2450 if (src_val->self_recursion_generated_p ())
2452 ret |= dest_lat->set_contains_variable ();
2453 continue;
2456 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2457 src_val, res_type);
2458 if (cstval
2459 && ipacp_value_safe_for_type (res_type, cstval))
2460 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2461 src_offset);
2462 else
2463 ret |= dest_lat->set_contains_variable ();
2466 return ret;
2469 /* Propagate values through a pass-through jump function JFUNC associated with
2470 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2471 is the index of the source parameter. PARM_TYPE is the type of the
2472 parameter to which the result is passed. */
2474 static bool
2475 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2476 ipcp_lattice<tree> *src_lat,
2477 ipcp_lattice<tree> *dest_lat, int src_idx,
2478 tree parm_type)
2480 return propagate_vals_across_arith_jfunc (cs,
2481 ipa_get_jf_pass_through_operation (jfunc),
2482 NULL_TREE,
2483 ipa_get_jf_pass_through_operand (jfunc),
2484 src_lat, dest_lat, -1, src_idx, parm_type);
2487 /* Propagate values through an ancestor jump function JFUNC associated with
2488 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2489 is the index of the source parameter. */
2491 static bool
2492 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2493 struct ipa_jump_func *jfunc,
2494 ipcp_lattice<tree> *src_lat,
2495 ipcp_lattice<tree> *dest_lat, int src_idx,
2496 tree param_type)
2498 ipcp_value<tree> *src_val;
2499 bool ret = false;
2501 if (ipa_edge_within_scc (cs))
2502 return dest_lat->set_contains_variable ();
2504 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2506 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2508 if (t && ipacp_value_safe_for_type (param_type, t))
2509 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2510 else
2511 ret |= dest_lat->set_contains_variable ();
2514 return ret;
2517 /* Propagate scalar values across jump function JFUNC that is associated with
2518 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2519 parameter to which the result is passed. */
2521 static bool
2522 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2523 struct ipa_jump_func *jfunc,
2524 ipcp_lattice<tree> *dest_lat,
2525 tree param_type)
2527 if (dest_lat->bottom)
2528 return false;
2530 if (jfunc->type == IPA_JF_CONST)
2532 tree val = ipa_get_jf_constant (jfunc);
2533 if (ipacp_value_safe_for_type (param_type, val))
2534 return dest_lat->add_value (val, cs, NULL, 0);
2535 else
2536 return dest_lat->set_contains_variable ();
2538 else if (jfunc->type == IPA_JF_PASS_THROUGH
2539 || jfunc->type == IPA_JF_ANCESTOR)
2541 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2542 ipcp_lattice<tree> *src_lat;
2543 int src_idx;
2544 bool ret;
2546 if (jfunc->type == IPA_JF_PASS_THROUGH)
2547 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2548 else
2549 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2551 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2552 if (src_lat->bottom)
2553 return dest_lat->set_contains_variable ();
2555 /* If we would need to clone the caller and cannot, do not propagate. */
2556 if (!ipcp_versionable_function_p (cs->caller)
2557 && (src_lat->contains_variable
2558 || (src_lat->values_count > 1)))
2559 return dest_lat->set_contains_variable ();
2561 if (jfunc->type == IPA_JF_PASS_THROUGH)
2562 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2563 dest_lat, src_idx,
2564 param_type);
2565 else
2566 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2567 src_idx, param_type);
2569 if (src_lat->contains_variable)
2570 ret |= dest_lat->set_contains_variable ();
2572 return ret;
2575 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2576 use it for indirect inlining), we should propagate them too. */
2577 return dest_lat->set_contains_variable ();
2580 /* Propagate scalar values across jump function JFUNC that is associated with
2581 edge CS and describes argument IDX and put the values into DEST_LAT. */
2583 static bool
2584 propagate_context_across_jump_function (cgraph_edge *cs,
2585 ipa_jump_func *jfunc, int idx,
2586 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2588 if (dest_lat->bottom)
2589 return false;
2590 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2591 bool ret = false;
2592 bool added_sth = false;
2593 bool type_preserved = true;
2595 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2596 = ipa_get_ith_polymorhic_call_context (args, idx);
2598 if (edge_ctx_ptr)
2599 edge_ctx = *edge_ctx_ptr;
2601 if (jfunc->type == IPA_JF_PASS_THROUGH
2602 || jfunc->type == IPA_JF_ANCESTOR)
2604 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2605 int src_idx;
2606 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2608 /* TODO: Once we figure out how to propagate speculations, it will
2609 probably be a good idea to switch to speculation if type_preserved is
2610 not set instead of punting. */
2611 if (jfunc->type == IPA_JF_PASS_THROUGH)
2613 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2614 goto prop_fail;
2615 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2616 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2618 else
2620 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2621 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2624 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2625 /* If we would need to clone the caller and cannot, do not propagate. */
2626 if (!ipcp_versionable_function_p (cs->caller)
2627 && (src_lat->contains_variable
2628 || (src_lat->values_count > 1)))
2629 goto prop_fail;
2631 ipcp_value<ipa_polymorphic_call_context> *src_val;
2632 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2634 ipa_polymorphic_call_context cur = src_val->value;
2636 if (!type_preserved)
2637 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2638 if (jfunc->type == IPA_JF_ANCESTOR)
2639 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2640 /* TODO: In cases we know how the context is going to be used,
2641 we can improve the result by passing proper OTR_TYPE. */
2642 cur.combine_with (edge_ctx);
2643 if (!cur.useless_p ())
2645 if (src_lat->contains_variable
2646 && !edge_ctx.equal_to (cur))
2647 ret |= dest_lat->set_contains_variable ();
2648 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2649 added_sth = true;
2654 prop_fail:
2655 if (!added_sth)
2657 if (!edge_ctx.useless_p ())
2658 ret |= dest_lat->add_value (edge_ctx, cs);
2659 else
2660 ret |= dest_lat->set_contains_variable ();
2663 return ret;
2666 /* Propagate bits across jfunc that is associated with
2667 edge cs and update dest_lattice accordingly. */
2669 bool
2670 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2671 ipa_jump_func *jfunc,
2672 ipcp_bits_lattice *dest_lattice)
2674 if (dest_lattice->bottom_p ())
2675 return false;
2677 enum availability availability;
2678 cgraph_node *callee = cs->callee->function_symbol (&availability);
2679 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2680 tree parm_type = ipa_get_type (callee_info, idx);
2682 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2683 transform for these cases. Similarly, we can have bad type mismatches
2684 with LTO, avoid doing anything with those too. */
2685 if (!parm_type
2686 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2689 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2690 "param %i of %s is NULL or unsuitable for bits propagation\n",
2691 idx, cs->callee->dump_name ());
2693 return dest_lattice->set_to_bottom ();
2696 unsigned precision = TYPE_PRECISION (parm_type);
2697 signop sgn = TYPE_SIGN (parm_type);
2699 if (jfunc->type == IPA_JF_PASS_THROUGH
2700 || jfunc->type == IPA_JF_ANCESTOR)
2702 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2703 tree operand = NULL_TREE;
2704 enum tree_code code;
2705 unsigned src_idx;
2706 bool keep_null = false;
2708 if (jfunc->type == IPA_JF_PASS_THROUGH)
2710 code = ipa_get_jf_pass_through_operation (jfunc);
2711 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2712 if (code != NOP_EXPR)
2713 operand = ipa_get_jf_pass_through_operand (jfunc);
2715 else
2717 code = POINTER_PLUS_EXPR;
2718 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2719 unsigned HOST_WIDE_INT offset
2720 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2721 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2722 operand = build_int_cstu (size_type_node, offset);
2725 class ipcp_param_lattices *src_lats
2726 = ipa_get_parm_lattices (caller_info, src_idx);
2728 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2729 for eg consider:
2730 int f(int x)
2732 g (x & 0xff);
2734 Assume lattice for x is bottom, however we can still propagate
2735 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2736 and we store it in jump function during analysis stage. */
2738 if (!src_lats->bits_lattice.bottom_p ())
2740 bool drop_all_ones
2741 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2743 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2744 sgn, code, operand, drop_all_ones);
2748 Value_Range vr (parm_type);
2749 if (jfunc->m_vr)
2751 jfunc->m_vr->get_vrange (vr);
2752 if (!vr.undefined_p () && !vr.varying_p ())
2754 irange &r = as_a <irange> (vr);
2755 irange_bitmask bm = r.get_bitmask ();
2756 widest_int mask
2757 = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2758 widest_int value
2759 = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2760 return dest_lattice->meet_with (value, mask, precision);
2763 return dest_lattice->set_to_bottom ();
2766 /* Propagate value range across jump function JFUNC that is associated with
2767 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2768 accordingly. */
2770 static bool
2771 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2772 class ipcp_param_lattices *dest_plats,
2773 tree param_type)
2775 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2777 if (dest_lat->bottom_p ())
2778 return false;
2780 if (!param_type
2781 || (!INTEGRAL_TYPE_P (param_type)
2782 && !POINTER_TYPE_P (param_type)))
2783 return dest_lat->set_to_bottom ();
2785 if (jfunc->type == IPA_JF_PASS_THROUGH)
2787 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2788 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2789 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2790 class ipcp_param_lattices *src_lats
2791 = ipa_get_parm_lattices (caller_info, src_idx);
2792 tree operand_type = ipa_get_type (caller_info, src_idx);
2794 if (src_lats->m_value_range.bottom_p ())
2795 return dest_lat->set_to_bottom ();
2797 Value_Range vr (operand_type);
2798 if (TREE_CODE_CLASS (operation) == tcc_unary)
2799 ipa_vr_operation_and_type_effects (vr,
2800 src_lats->m_value_range.m_vr,
2801 operation, param_type,
2802 operand_type);
2803 /* A crude way to prevent unbounded number of value range updates
2804 in SCC components. We should allow limited number of updates within
2805 SCC, too. */
2806 else if (!ipa_edge_within_scc (cs))
2808 tree op = ipa_get_jf_pass_through_operand (jfunc);
2809 Value_Range op_vr (TREE_TYPE (op));
2810 Value_Range op_res (operand_type);
2811 range_op_handler handler (operation);
2813 ipa_range_set_and_normalize (op_vr, op);
2815 if (!handler
2816 || !op_res.supports_type_p (operand_type)
2817 || !handler.fold_range (op_res, operand_type,
2818 src_lats->m_value_range.m_vr, op_vr))
2819 op_res.set_varying (operand_type);
2821 ipa_vr_operation_and_type_effects (vr,
2822 op_res,
2823 NOP_EXPR, param_type,
2824 operand_type);
2826 if (!vr.undefined_p () && !vr.varying_p ())
2828 if (jfunc->m_vr)
2830 Value_Range jvr (param_type);
2831 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2832 NOP_EXPR,
2833 param_type,
2834 jfunc->m_vr->type ()))
2835 vr.intersect (jvr);
2837 return dest_lat->meet_with (vr);
2840 else if (jfunc->type == IPA_JF_CONST)
2842 tree val = ipa_get_jf_constant (jfunc);
2843 if (TREE_CODE (val) == INTEGER_CST)
2845 val = fold_convert (param_type, val);
2846 if (TREE_OVERFLOW_P (val))
2847 val = drop_tree_overflow (val);
2849 Value_Range tmpvr (val, val);
2850 return dest_lat->meet_with (tmpvr);
2854 Value_Range vr (param_type);
2855 if (jfunc->m_vr
2856 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2857 param_type,
2858 jfunc->m_vr->type ()))
2859 return dest_lat->meet_with (vr);
2860 else
2861 return dest_lat->set_to_bottom ();
2864 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2865 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2866 other cases, return false). If there are no aggregate items, set
2867 aggs_by_ref to NEW_AGGS_BY_REF. */
2869 static bool
2870 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2871 bool new_aggs_by_ref)
2873 if (dest_plats->aggs)
2875 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2877 set_agg_lats_to_bottom (dest_plats);
2878 return true;
2881 else
2882 dest_plats->aggs_by_ref = new_aggs_by_ref;
2883 return false;
2886 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2887 already existing lattice for the given OFFSET and SIZE, marking all skipped
2888 lattices as containing variable and checking for overlaps. If there is no
2889 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2890 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2891 unless there are too many already. If there are two many, return false. If
2892 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2893 skipped lattices were newly marked as containing variable, set *CHANGE to
2894 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2896 static bool
2897 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2898 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2899 struct ipcp_agg_lattice ***aglat,
2900 bool pre_existing, bool *change, int max_agg_items)
2902 gcc_checking_assert (offset >= 0);
2904 while (**aglat && (**aglat)->offset < offset)
2906 if ((**aglat)->offset + (**aglat)->size > offset)
2908 set_agg_lats_to_bottom (dest_plats);
2909 return false;
2911 *change |= (**aglat)->set_contains_variable ();
2912 *aglat = &(**aglat)->next;
2915 if (**aglat && (**aglat)->offset == offset)
2917 if ((**aglat)->size != val_size)
2919 set_agg_lats_to_bottom (dest_plats);
2920 return false;
2922 gcc_assert (!(**aglat)->next
2923 || (**aglat)->next->offset >= offset + val_size);
2924 return true;
2926 else
2928 struct ipcp_agg_lattice *new_al;
2930 if (**aglat && (**aglat)->offset < offset + val_size)
2932 set_agg_lats_to_bottom (dest_plats);
2933 return false;
2935 if (dest_plats->aggs_count == max_agg_items)
2936 return false;
2937 dest_plats->aggs_count++;
2938 new_al = ipcp_agg_lattice_pool.allocate ();
2939 memset (new_al, 0, sizeof (*new_al));
2941 new_al->offset = offset;
2942 new_al->size = val_size;
2943 new_al->contains_variable = pre_existing;
2945 new_al->next = **aglat;
2946 **aglat = new_al;
2947 return true;
2951 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2952 containing an unknown value. */
2954 static bool
2955 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2957 bool ret = false;
2958 while (aglat)
2960 ret |= aglat->set_contains_variable ();
2961 aglat = aglat->next;
2963 return ret;
2966 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2967 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2968 parameter used for lattice value sources. Return true if DEST_PLATS changed
2969 in any way. */
2971 static bool
2972 merge_aggregate_lattices (struct cgraph_edge *cs,
2973 class ipcp_param_lattices *dest_plats,
2974 class ipcp_param_lattices *src_plats,
2975 int src_idx, HOST_WIDE_INT offset_delta)
2977 bool pre_existing = dest_plats->aggs != NULL;
2978 struct ipcp_agg_lattice **dst_aglat;
2979 bool ret = false;
2981 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2982 return true;
2983 if (src_plats->aggs_bottom)
2984 return set_agg_lats_contain_variable (dest_plats);
2985 if (src_plats->aggs_contain_variable)
2986 ret |= set_agg_lats_contain_variable (dest_plats);
2987 dst_aglat = &dest_plats->aggs;
2989 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2990 param_ipa_max_agg_items);
2991 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2992 src_aglat;
2993 src_aglat = src_aglat->next)
2995 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2997 if (new_offset < 0)
2998 continue;
2999 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
3000 &dst_aglat, pre_existing, &ret, max_agg_items))
3002 struct ipcp_agg_lattice *new_al = *dst_aglat;
3004 dst_aglat = &(*dst_aglat)->next;
3005 if (src_aglat->bottom)
3007 ret |= new_al->set_contains_variable ();
3008 continue;
3010 if (src_aglat->contains_variable)
3011 ret |= new_al->set_contains_variable ();
3012 for (ipcp_value<tree> *val = src_aglat->values;
3013 val;
3014 val = val->next)
3015 ret |= new_al->add_value (val->value, cs, val, src_idx,
3016 src_aglat->offset);
3018 else if (dest_plats->aggs_bottom)
3019 return true;
3021 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
3022 return ret;
3025 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3026 pass-through JFUNC and if so, whether it has conform and conforms to the
3027 rules about propagating values passed by reference. */
3029 static bool
3030 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
3031 struct ipa_jump_func *jfunc)
3033 return src_plats->aggs
3034 && (!src_plats->aggs_by_ref
3035 || ipa_get_jf_pass_through_agg_preserved (jfunc));
3038 /* Propagate values through ITEM, jump function for a part of an aggregate,
3039 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3040 associated with the jump function. Return true if AGLAT changed in any
3041 way. */
3043 static bool
3044 propagate_aggregate_lattice (struct cgraph_edge *cs,
3045 struct ipa_agg_jf_item *item,
3046 struct ipcp_agg_lattice *aglat)
3048 class ipa_node_params *caller_info;
3049 class ipcp_param_lattices *src_plats;
3050 struct ipcp_lattice<tree> *src_lat;
3051 HOST_WIDE_INT src_offset;
3052 int src_idx;
3053 tree load_type;
3054 bool ret;
3056 if (item->jftype == IPA_JF_CONST)
3058 tree value = item->value.constant;
3060 gcc_checking_assert (is_gimple_ip_invariant (value));
3061 return aglat->add_value (value, cs, NULL, 0);
3064 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
3065 || item->jftype == IPA_JF_LOAD_AGG);
3067 caller_info = ipa_node_params_sum->get (cs->caller);
3068 src_idx = item->value.pass_through.formal_id;
3069 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3071 if (item->jftype == IPA_JF_PASS_THROUGH)
3073 load_type = NULL_TREE;
3074 src_lat = &src_plats->itself;
3075 src_offset = -1;
3077 else
3079 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3080 struct ipcp_agg_lattice *src_aglat;
3082 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3083 if (src_aglat->offset >= load_offset)
3084 break;
3086 load_type = item->value.load_agg.type;
3087 if (!src_aglat
3088 || src_aglat->offset > load_offset
3089 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3090 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3091 return aglat->set_contains_variable ();
3093 src_lat = src_aglat;
3094 src_offset = load_offset;
3097 if (src_lat->bottom
3098 || (!ipcp_versionable_function_p (cs->caller)
3099 && !src_lat->is_single_const ()))
3100 return aglat->set_contains_variable ();
3102 ret = propagate_vals_across_arith_jfunc (cs,
3103 item->value.pass_through.operation,
3104 load_type,
3105 item->value.pass_through.operand,
3106 src_lat, aglat,
3107 src_offset,
3108 src_idx,
3109 item->type);
3111 if (src_lat->contains_variable)
3112 ret |= aglat->set_contains_variable ();
3114 return ret;
3117 /* Propagate scalar values across jump function JFUNC that is associated with
3118 edge CS and put the values into DEST_LAT. */
3120 static bool
3121 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3122 struct ipa_jump_func *jfunc,
3123 class ipcp_param_lattices *dest_plats)
3125 bool ret = false;
3127 if (dest_plats->aggs_bottom)
3128 return false;
3130 if (jfunc->type == IPA_JF_PASS_THROUGH
3131 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3133 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3134 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3135 class ipcp_param_lattices *src_plats;
3137 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3138 if (agg_pass_through_permissible_p (src_plats, jfunc))
3140 /* Currently we do not produce clobber aggregate jump
3141 functions, replace with merging when we do. */
3142 gcc_assert (!jfunc->agg.items);
3143 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3144 src_idx, 0);
3145 return ret;
3148 else if (jfunc->type == IPA_JF_ANCESTOR
3149 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3151 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3152 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3153 class ipcp_param_lattices *src_plats;
3155 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3156 if (src_plats->aggs && src_plats->aggs_by_ref)
3158 /* Currently we do not produce clobber aggregate jump
3159 functions, replace with merging when we do. */
3160 gcc_assert (!jfunc->agg.items);
3161 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3162 ipa_get_jf_ancestor_offset (jfunc));
3164 else if (!src_plats->aggs_by_ref)
3165 ret |= set_agg_lats_to_bottom (dest_plats);
3166 else
3167 ret |= set_agg_lats_contain_variable (dest_plats);
3168 return ret;
3171 if (jfunc->agg.items)
3173 bool pre_existing = dest_plats->aggs != NULL;
3174 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3175 struct ipa_agg_jf_item *item;
3176 int i;
3178 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3179 return true;
3181 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3182 param_ipa_max_agg_items);
3183 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3185 HOST_WIDE_INT val_size;
3187 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3188 continue;
3189 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3191 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3192 &aglat, pre_existing, &ret, max_agg_items))
3194 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3195 aglat = &(*aglat)->next;
3197 else if (dest_plats->aggs_bottom)
3198 return true;
3201 ret |= set_chain_of_aglats_contains_variable (*aglat);
3203 else
3204 ret |= set_agg_lats_contain_variable (dest_plats);
3206 return ret;
3209 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3210 non-thunk) destination, the call passes through a thunk. */
3212 static bool
3213 call_passes_through_thunk (cgraph_edge *cs)
3215 cgraph_node *alias_or_thunk = cs->callee;
3216 while (alias_or_thunk->alias)
3217 alias_or_thunk = alias_or_thunk->get_alias_target ();
3218 return alias_or_thunk->thunk;
3221 /* Propagate constants from the caller to the callee of CS. INFO describes the
3222 caller. */
3224 static bool
3225 propagate_constants_across_call (struct cgraph_edge *cs)
3227 class ipa_node_params *callee_info;
3228 enum availability availability;
3229 cgraph_node *callee;
3230 class ipa_edge_args *args;
3231 bool ret = false;
3232 int i, args_count, parms_count;
3234 callee = cs->callee->function_symbol (&availability);
3235 if (!callee->definition)
3236 return false;
3237 gcc_checking_assert (callee->has_gimple_body_p ());
3238 callee_info = ipa_node_params_sum->get (callee);
3239 if (!callee_info)
3240 return false;
3242 args = ipa_edge_args_sum->get (cs);
3243 parms_count = ipa_get_param_count (callee_info);
3244 if (parms_count == 0)
3245 return false;
3246 if (!args
3247 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3248 || !opt_for_fn (cs->caller->decl, optimize))
3250 for (i = 0; i < parms_count; i++)
3251 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3252 i));
3253 return ret;
3255 args_count = ipa_get_cs_argument_count (args);
3257 /* If this call goes through a thunk we must not propagate to the first (0th)
3258 parameter. However, we might need to uncover a thunk from below a series
3259 of aliases first. */
3260 if (call_passes_through_thunk (cs))
3262 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3263 0));
3264 i = 1;
3266 else
3267 i = 0;
3269 for (; (i < args_count) && (i < parms_count); i++)
3271 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3272 class ipcp_param_lattices *dest_plats;
3273 tree param_type = ipa_get_type (callee_info, i);
3275 dest_plats = ipa_get_parm_lattices (callee_info, i);
3276 if (availability == AVAIL_INTERPOSABLE)
3277 ret |= set_all_contains_variable (dest_plats);
3278 else
3280 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3281 &dest_plats->itself,
3282 param_type);
3283 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3284 &dest_plats->ctxlat);
3286 |= propagate_bits_across_jump_function (cs, i, jump_func,
3287 &dest_plats->bits_lattice);
3288 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3289 dest_plats);
3290 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3291 ret |= propagate_vr_across_jump_function (cs, jump_func,
3292 dest_plats, param_type);
3293 else
3294 ret |= dest_plats->m_value_range.set_to_bottom ();
3297 for (; i < parms_count; i++)
3298 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3300 return ret;
3303 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3304 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3305 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3306 KNOWN_AGGS is ignored. */
3308 static tree
3309 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3310 const vec<tree> &known_csts,
3311 const vec<ipa_polymorphic_call_context> &known_contexts,
3312 const ipa_argagg_value_list &avs,
3313 bool *speculative)
3315 int param_index = ie->indirect_info->param_index;
3316 HOST_WIDE_INT anc_offset;
3317 tree t = NULL;
3318 tree target = NULL;
3320 *speculative = false;
3322 if (param_index == -1)
3323 return NULL_TREE;
3325 if (!ie->indirect_info->polymorphic)
3327 tree t = NULL;
3329 if (ie->indirect_info->agg_contents)
3331 t = NULL;
3332 if ((unsigned) param_index < known_csts.length ()
3333 && known_csts[param_index])
3334 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3335 ie->indirect_info->offset,
3336 ie->indirect_info->by_ref);
3338 if (!t && ie->indirect_info->guaranteed_unmodified)
3339 t = avs.get_value (param_index,
3340 ie->indirect_info->offset / BITS_PER_UNIT,
3341 ie->indirect_info->by_ref);
3343 else if ((unsigned) param_index < known_csts.length ())
3344 t = known_csts[param_index];
3346 if (t
3347 && TREE_CODE (t) == ADDR_EXPR
3348 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3349 return TREE_OPERAND (t, 0);
3350 else
3351 return NULL_TREE;
3354 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3355 return NULL_TREE;
3357 gcc_assert (!ie->indirect_info->agg_contents);
3358 gcc_assert (!ie->indirect_info->by_ref);
3359 anc_offset = ie->indirect_info->offset;
3361 t = NULL;
3363 if ((unsigned) param_index < known_csts.length ()
3364 && known_csts[param_index])
3365 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3366 ie->indirect_info->offset, true);
3368 /* Try to work out value of virtual table pointer value in replacements. */
3369 /* or known aggregate values. */
3370 if (!t)
3371 t = avs.get_value (param_index,
3372 ie->indirect_info->offset / BITS_PER_UNIT,
3373 true);
3375 /* If we found the virtual table pointer, lookup the target. */
3376 if (t)
3378 tree vtable;
3379 unsigned HOST_WIDE_INT offset;
3380 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3382 bool can_refer;
3383 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3384 vtable, offset, &can_refer);
3385 if (can_refer)
3387 if (!target
3388 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3389 || !possible_polymorphic_call_target_p
3390 (ie, cgraph_node::get (target)))
3392 /* Do not speculate builtin_unreachable, it is stupid! */
3393 if (ie->indirect_info->vptr_changed)
3394 return NULL;
3395 target = ipa_impossible_devirt_target (ie, target);
3397 *speculative = ie->indirect_info->vptr_changed;
3398 if (!*speculative)
3399 return target;
3404 /* Do we know the constant value of pointer? */
3405 if (!t && (unsigned) param_index < known_csts.length ())
3406 t = known_csts[param_index];
3408 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3410 ipa_polymorphic_call_context context;
3411 if (known_contexts.length () > (unsigned int) param_index)
3413 context = known_contexts[param_index];
3414 context.offset_by (anc_offset);
3415 if (ie->indirect_info->vptr_changed)
3416 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3417 ie->indirect_info->otr_type);
3418 if (t)
3420 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3421 (t, ie->indirect_info->otr_type, anc_offset);
3422 if (!ctx2.useless_p ())
3423 context.combine_with (ctx2, ie->indirect_info->otr_type);
3426 else if (t)
3428 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3429 anc_offset);
3430 if (ie->indirect_info->vptr_changed)
3431 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3432 ie->indirect_info->otr_type);
3434 else
3435 return NULL_TREE;
3437 vec <cgraph_node *>targets;
3438 bool final;
3440 targets = possible_polymorphic_call_targets
3441 (ie->indirect_info->otr_type,
3442 ie->indirect_info->otr_token,
3443 context, &final);
3444 if (!final || targets.length () > 1)
3446 struct cgraph_node *node;
3447 if (*speculative)
3448 return target;
3449 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3450 || ie->speculative || !ie->maybe_hot_p ())
3451 return NULL;
3452 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3453 ie->indirect_info->otr_token,
3454 context);
3455 if (node)
3457 *speculative = true;
3458 target = node->decl;
3460 else
3461 return NULL;
3463 else
3465 *speculative = false;
3466 if (targets.length () == 1)
3467 target = targets[0]->decl;
3468 else
3469 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3472 if (target && !possible_polymorphic_call_target_p (ie,
3473 cgraph_node::get (target)))
3475 if (*speculative)
3476 return NULL;
3477 target = ipa_impossible_devirt_target (ie, target);
3480 return target;
3483 /* If an indirect edge IE can be turned into a direct one based on data in
3484 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3485 whether the discovered target is only speculative guess. */
3487 tree
3488 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3489 ipa_call_arg_values *avals,
3490 bool *speculative)
3492 ipa_argagg_value_list avl (avals);
3493 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3494 avals->m_known_contexts,
3495 avl, speculative);
3498 /* Calculate devirtualization time bonus for NODE, assuming we know information
3499 about arguments stored in AVALS. */
3501 static int
3502 devirtualization_time_bonus (struct cgraph_node *node,
3503 ipa_auto_call_arg_values *avals)
3505 struct cgraph_edge *ie;
3506 int res = 0;
3508 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3510 struct cgraph_node *callee;
3511 class ipa_fn_summary *isummary;
3512 enum availability avail;
3513 tree target;
3514 bool speculative;
3516 ipa_argagg_value_list avl (avals);
3517 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3518 avals->m_known_contexts,
3519 avl, &speculative);
3520 if (!target)
3521 continue;
3523 /* Only bare minimum benefit for clearly un-inlineable targets. */
3524 res += 1;
3525 callee = cgraph_node::get (target);
3526 if (!callee || !callee->definition)
3527 continue;
3528 callee = callee->function_symbol (&avail);
3529 if (avail < AVAIL_AVAILABLE)
3530 continue;
3531 isummary = ipa_fn_summaries->get (callee);
3532 if (!isummary || !isummary->inlinable)
3533 continue;
3535 int size = ipa_size_summaries->get (callee)->size;
3536 /* FIXME: The values below need re-considering and perhaps also
3537 integrating into the cost metrics, at lest in some very basic way. */
3538 int max_inline_insns_auto
3539 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3540 if (size <= max_inline_insns_auto / 4)
3541 res += 31 / ((int)speculative + 1);
3542 else if (size <= max_inline_insns_auto / 2)
3543 res += 15 / ((int)speculative + 1);
3544 else if (size <= max_inline_insns_auto
3545 || DECL_DECLARED_INLINE_P (callee->decl))
3546 res += 7 / ((int)speculative + 1);
3549 return res;
3552 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3554 static int
3555 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3557 int result = 0;
3558 ipa_hints hints = estimates.hints;
3559 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3560 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3562 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3564 if (hints & INLINE_HINT_loop_iterations)
3565 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3567 if (hints & INLINE_HINT_loop_stride)
3568 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3570 return result;
3573 /* If there is a reason to penalize the function described by INFO in the
3574 cloning goodness evaluation, do so. */
3576 static inline sreal
3577 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3578 sreal evaluation)
3580 if (info->node_within_scc && !info->node_is_self_scc)
3581 evaluation = (evaluation
3582 * (100 - opt_for_fn (node->decl,
3583 param_ipa_cp_recursion_penalty))) / 100;
3585 if (info->node_calling_single_call)
3586 evaluation = (evaluation
3587 * (100 - opt_for_fn (node->decl,
3588 param_ipa_cp_single_call_penalty)))
3589 / 100;
3591 return evaluation;
3594 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3595 and SIZE_COST and with the sum of frequencies of incoming edges to the
3596 potential new clone in FREQUENCIES. */
3598 static bool
3599 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3600 sreal freq_sum, profile_count count_sum,
3601 int size_cost)
3603 if (time_benefit == 0
3604 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3605 || node->optimize_for_size_p ())
3606 return false;
3608 gcc_assert (size_cost > 0);
3610 ipa_node_params *info = ipa_node_params_sum->get (node);
3611 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3612 if (count_sum.nonzero_p ())
3614 gcc_assert (base_count.nonzero_p ());
3615 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3616 sreal evaluation = (time_benefit * factor) / size_cost;
3617 evaluation = incorporate_penalties (node, info, evaluation);
3618 evaluation *= 1000;
3620 if (dump_file && (dump_flags & TDF_DETAILS))
3622 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3623 "size: %i, count_sum: ", time_benefit.to_double (),
3624 size_cost);
3625 count_sum.dump (dump_file);
3626 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3627 info->node_within_scc
3628 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3629 info->node_calling_single_call ? ", single_call" : "",
3630 evaluation.to_double (), eval_threshold);
3633 return evaluation.to_int () >= eval_threshold;
3635 else
3637 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3638 evaluation = incorporate_penalties (node, info, evaluation);
3639 evaluation *= 1000;
3641 if (dump_file && (dump_flags & TDF_DETAILS))
3642 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3643 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3644 "threshold: %i\n",
3645 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3646 info->node_within_scc
3647 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3648 info->node_calling_single_call ? ", single_call" : "",
3649 evaluation.to_double (), eval_threshold);
3651 return evaluation.to_int () >= eval_threshold;
3655 /* Grow vectors in AVALS and fill them with information about values of
3656 parameters that are known to be independent of the context. Only calculate
3657 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3658 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3659 parameters will be stored in it.
3661 TODO: Also grow context independent value range vectors. */
3663 static bool
3664 gather_context_independent_values (class ipa_node_params *info,
3665 ipa_auto_call_arg_values *avals,
3666 bool calculate_aggs,
3667 int *removable_params_cost)
3669 int i, count = ipa_get_param_count (info);
3670 bool ret = false;
3672 avals->m_known_vals.safe_grow_cleared (count, true);
3673 avals->m_known_contexts.safe_grow_cleared (count, true);
3675 if (removable_params_cost)
3676 *removable_params_cost = 0;
3678 for (i = 0; i < count; i++)
3680 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3681 ipcp_lattice<tree> *lat = &plats->itself;
3683 if (lat->is_single_const ())
3685 ipcp_value<tree> *val = lat->values;
3686 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3687 avals->m_known_vals[i] = val->value;
3688 if (removable_params_cost)
3689 *removable_params_cost
3690 += estimate_move_cost (TREE_TYPE (val->value), false);
3691 ret = true;
3693 else if (removable_params_cost
3694 && !ipa_is_param_used (info, i))
3695 *removable_params_cost
3696 += ipa_get_param_move_cost (info, i);
3698 if (!ipa_is_param_used (info, i))
3699 continue;
3701 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3702 /* Do not account known context as reason for cloning. We can see
3703 if it permits devirtualization. */
3704 if (ctxlat->is_single_const ())
3705 avals->m_known_contexts[i] = ctxlat->values->value;
3707 if (calculate_aggs)
3708 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3711 return ret;
3714 /* Perform time and size measurement of NODE with the context given in AVALS,
3715 calculate the benefit compared to the node without specialization and store
3716 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3717 context-independent or unused removable parameters and EST_MOVE_COST, the
3718 estimated movement of the considered parameter. */
3720 static void
3721 perform_estimation_of_a_value (cgraph_node *node,
3722 ipa_auto_call_arg_values *avals,
3723 int removable_params_cost, int est_move_cost,
3724 ipcp_value_base *val)
3726 sreal time_benefit;
3727 ipa_call_estimates estimates;
3729 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3731 /* Extern inline functions have no cloning local time benefits because they
3732 will be inlined anyway. The only reason to clone them is if it enables
3733 optimization in any of the functions they call. */
3734 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3735 time_benefit = 0;
3736 else
3737 time_benefit = (estimates.nonspecialized_time - estimates.time)
3738 + (devirtualization_time_bonus (node, avals)
3739 + hint_time_bonus (node, estimates)
3740 + removable_params_cost + est_move_cost);
3742 int size = estimates.size;
3743 gcc_checking_assert (size >=0);
3744 /* The inliner-heuristics based estimates may think that in certain
3745 contexts some functions do not have any size at all but we want
3746 all specializations to have at least a tiny cost, not least not to
3747 divide by zero. */
3748 if (size == 0)
3749 size = 1;
3751 val->local_time_benefit = time_benefit;
3752 val->local_size_cost = size;
3755 /* Get the overall limit oof growth based on parameters extracted from growth.
3756 it does not really make sense to mix functions with different overall growth
3757 limits but it is possible and if it happens, we do not want to select one
3758 limit at random. */
3760 static long
3761 get_max_overall_size (cgraph_node *node)
3763 long max_new_size = orig_overall_size;
3764 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3765 if (max_new_size < large_unit)
3766 max_new_size = large_unit;
3767 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3768 max_new_size += max_new_size * unit_growth / 100 + 1;
3769 return max_new_size;
3772 /* Return true if NODE should be cloned just for a parameter removal, possibly
3773 dumping a reason if not. */
3775 static bool
3776 clone_for_param_removal_p (cgraph_node *node)
3778 if (!node->can_change_signature)
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781 fprintf (dump_file, " Not considering cloning to remove parameters, "
3782 "function cannot change signature.\n");
3783 return false;
3785 if (node->can_be_local_p ())
3787 if (dump_file && (dump_flags & TDF_DETAILS))
3788 fprintf (dump_file, " Not considering cloning to remove parameters, "
3789 "IPA-SRA can do it potentially better.\n");
3790 return false;
3792 return true;
3795 /* Iterate over known values of parameters of NODE and estimate the local
3796 effects in terms of time and size they have. */
3798 static void
3799 estimate_local_effects (struct cgraph_node *node)
3801 ipa_node_params *info = ipa_node_params_sum->get (node);
3802 int count = ipa_get_param_count (info);
3803 bool always_const;
3804 int removable_params_cost;
3806 if (!count || !ipcp_versionable_function_p (node))
3807 return;
3809 if (dump_file && (dump_flags & TDF_DETAILS))
3810 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3812 ipa_auto_call_arg_values avals;
3813 always_const = gather_context_independent_values (info, &avals, true,
3814 &removable_params_cost);
3815 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3816 if (always_const || devirt_bonus
3817 || (removable_params_cost && clone_for_param_removal_p (node)))
3819 struct caller_statistics stats;
3820 ipa_call_estimates estimates;
3822 init_caller_stats (&stats);
3823 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3824 false);
3825 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3826 sreal time = estimates.nonspecialized_time - estimates.time;
3827 time += devirt_bonus;
3828 time += hint_time_bonus (node, estimates);
3829 time += removable_params_cost;
3830 int size = estimates.size - stats.n_calls * removable_params_cost;
3832 if (dump_file)
3833 fprintf (dump_file, " - context independent values, size: %i, "
3834 "time_benefit: %f\n", size, (time).to_double ());
3836 if (size <= 0 || node->local)
3838 info->do_clone_for_all_contexts = true;
3840 if (dump_file)
3841 fprintf (dump_file, " Decided to specialize for all "
3842 "known contexts, code not going to grow.\n");
3844 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3845 stats.count_sum, size))
3847 if (size + overall_size <= get_max_overall_size (node))
3849 info->do_clone_for_all_contexts = true;
3850 overall_size += size;
3852 if (dump_file)
3853 fprintf (dump_file, " Decided to specialize for all "
3854 "known contexts, growth (to %li) deemed "
3855 "beneficial.\n", overall_size);
3857 else if (dump_file && (dump_flags & TDF_DETAILS))
3858 fprintf (dump_file, " Not cloning for all contexts because "
3859 "maximum unit size would be reached with %li.\n",
3860 size + overall_size);
3862 else if (dump_file && (dump_flags & TDF_DETAILS))
3863 fprintf (dump_file, " Not cloning for all contexts because "
3864 "!good_cloning_opportunity_p.\n");
3868 for (int i = 0; i < count; i++)
3870 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3871 ipcp_lattice<tree> *lat = &plats->itself;
3872 ipcp_value<tree> *val;
3874 if (lat->bottom
3875 || !lat->values
3876 || avals.m_known_vals[i])
3877 continue;
3879 for (val = lat->values; val; val = val->next)
3881 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3882 avals.m_known_vals[i] = val->value;
3884 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3885 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3886 emc, val);
3888 if (dump_file && (dump_flags & TDF_DETAILS))
3890 fprintf (dump_file, " - estimates for value ");
3891 print_ipcp_constant_value (dump_file, val->value);
3892 fprintf (dump_file, " for ");
3893 ipa_dump_param (dump_file, info, i);
3894 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3895 val->local_time_benefit.to_double (),
3896 val->local_size_cost);
3899 avals.m_known_vals[i] = NULL_TREE;
3902 for (int i = 0; i < count; i++)
3904 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3906 if (!plats->virt_call)
3907 continue;
3909 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3910 ipcp_value<ipa_polymorphic_call_context> *val;
3912 if (ctxlat->bottom
3913 || !ctxlat->values
3914 || !avals.m_known_contexts[i].useless_p ())
3915 continue;
3917 for (val = ctxlat->values; val; val = val->next)
3919 avals.m_known_contexts[i] = val->value;
3920 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3921 0, val);
3923 if (dump_file && (dump_flags & TDF_DETAILS))
3925 fprintf (dump_file, " - estimates for polymorphic context ");
3926 print_ipcp_constant_value (dump_file, val->value);
3927 fprintf (dump_file, " for ");
3928 ipa_dump_param (dump_file, info, i);
3929 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3930 val->local_time_benefit.to_double (),
3931 val->local_size_cost);
3934 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3937 unsigned all_ctx_len = avals.m_known_aggs.length ();
3938 auto_vec<ipa_argagg_value, 32> all_ctx;
3939 all_ctx.reserve_exact (all_ctx_len);
3940 all_ctx.splice (avals.m_known_aggs);
3941 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3943 unsigned j = 0;
3944 for (int index = 0; index < count; index++)
3946 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3948 if (plats->aggs_bottom || !plats->aggs)
3949 continue;
3951 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3953 ipcp_value<tree> *val;
3954 if (aglat->bottom || !aglat->values
3955 /* If the following is true, the one value is already part of all
3956 context estimations. */
3957 || (!plats->aggs_contain_variable
3958 && aglat->is_single_const ()))
3959 continue;
3961 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3962 while (j < all_ctx_len
3963 && (all_ctx[j].index < index
3964 || (all_ctx[j].index == index
3965 && all_ctx[j].unit_offset < unit_offset)))
3967 avals.m_known_aggs[j] = all_ctx[j];
3968 j++;
3971 for (unsigned k = j; k < all_ctx_len; k++)
3972 avals.m_known_aggs[k+1] = all_ctx[k];
3974 for (val = aglat->values; val; val = val->next)
3976 avals.m_known_aggs[j].value = val->value;
3977 avals.m_known_aggs[j].unit_offset = unit_offset;
3978 avals.m_known_aggs[j].index = index;
3979 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3980 avals.m_known_aggs[j].killed = false;
3982 perform_estimation_of_a_value (node, &avals,
3983 removable_params_cost, 0, val);
3985 if (dump_file && (dump_flags & TDF_DETAILS))
3987 fprintf (dump_file, " - estimates for value ");
3988 print_ipcp_constant_value (dump_file, val->value);
3989 fprintf (dump_file, " for ");
3990 ipa_dump_param (dump_file, info, index);
3991 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3992 "]: time_benefit: %g, size: %i\n",
3993 plats->aggs_by_ref ? "ref " : "",
3994 aglat->offset,
3995 val->local_time_benefit.to_double (),
3996 val->local_size_cost);
4004 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
4005 topological sort of values. */
4007 template <typename valtype>
4008 void
4009 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
4011 ipcp_value_source<valtype> *src;
4013 if (cur_val->dfs)
4014 return;
4016 dfs_counter++;
4017 cur_val->dfs = dfs_counter;
4018 cur_val->low_link = dfs_counter;
4020 cur_val->topo_next = stack;
4021 stack = cur_val;
4022 cur_val->on_stack = true;
4024 for (src = cur_val->sources; src; src = src->next)
4025 if (src->val)
4027 if (src->val->dfs == 0)
4029 add_val (src->val);
4030 if (src->val->low_link < cur_val->low_link)
4031 cur_val->low_link = src->val->low_link;
4033 else if (src->val->on_stack
4034 && src->val->dfs < cur_val->low_link)
4035 cur_val->low_link = src->val->dfs;
4038 if (cur_val->dfs == cur_val->low_link)
4040 ipcp_value<valtype> *v, *scc_list = NULL;
4044 v = stack;
4045 stack = v->topo_next;
4046 v->on_stack = false;
4047 v->scc_no = cur_val->dfs;
4049 v->scc_next = scc_list;
4050 scc_list = v;
4052 while (v != cur_val);
4054 cur_val->topo_next = values_topo;
4055 values_topo = cur_val;
4059 /* Add all values in lattices associated with NODE to the topological sort if
4060 they are not there yet. */
4062 static void
4063 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
4065 ipa_node_params *info = ipa_node_params_sum->get (node);
4066 int i, count = ipa_get_param_count (info);
4068 for (i = 0; i < count; i++)
4070 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4071 ipcp_lattice<tree> *lat = &plats->itself;
4072 struct ipcp_agg_lattice *aglat;
4074 if (!lat->bottom)
4076 ipcp_value<tree> *val;
4077 for (val = lat->values; val; val = val->next)
4078 topo->constants.add_val (val);
4081 if (!plats->aggs_bottom)
4082 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4083 if (!aglat->bottom)
4085 ipcp_value<tree> *val;
4086 for (val = aglat->values; val; val = val->next)
4087 topo->constants.add_val (val);
4090 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4091 if (!ctxlat->bottom)
4093 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4094 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4095 topo->contexts.add_val (ctxval);
4100 /* One pass of constants propagation along the call graph edges, from callers
4101 to callees (requires topological ordering in TOPO), iterate over strongly
4102 connected components. */
4104 static void
4105 propagate_constants_topo (class ipa_topo_info *topo)
4107 int i;
4109 for (i = topo->nnodes - 1; i >= 0; i--)
4111 unsigned j;
4112 struct cgraph_node *v, *node = topo->order[i];
4113 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4115 /* First, iteratively propagate within the strongly connected component
4116 until all lattices stabilize. */
4117 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4118 if (v->has_gimple_body_p ())
4120 if (opt_for_fn (v->decl, flag_ipa_cp)
4121 && opt_for_fn (v->decl, optimize))
4122 push_node_to_stack (topo, v);
4123 /* When V is not optimized, we can not push it to stack, but
4124 still we need to set all its callees lattices to bottom. */
4125 else
4127 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4128 propagate_constants_across_call (cs);
4132 v = pop_node_from_stack (topo);
4133 while (v)
4135 struct cgraph_edge *cs;
4136 class ipa_node_params *info = NULL;
4137 bool self_scc = true;
4139 for (cs = v->callees; cs; cs = cs->next_callee)
4140 if (ipa_edge_within_scc (cs))
4142 cgraph_node *callee = cs->callee->function_symbol ();
4144 if (v != callee)
4145 self_scc = false;
4147 if (!info)
4149 info = ipa_node_params_sum->get (v);
4150 info->node_within_scc = true;
4153 if (propagate_constants_across_call (cs))
4154 push_node_to_stack (topo, callee);
4157 if (info)
4158 info->node_is_self_scc = self_scc;
4160 v = pop_node_from_stack (topo);
4163 /* Afterwards, propagate along edges leading out of the SCC, calculates
4164 the local effects of the discovered constants and all valid values to
4165 their topological sort. */
4166 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4167 if (v->has_gimple_body_p ()
4168 && opt_for_fn (v->decl, flag_ipa_cp)
4169 && opt_for_fn (v->decl, optimize))
4171 struct cgraph_edge *cs;
4173 estimate_local_effects (v);
4174 add_all_node_vals_to_toposort (v, topo);
4175 for (cs = v->callees; cs; cs = cs->next_callee)
4176 if (!ipa_edge_within_scc (cs))
4177 propagate_constants_across_call (cs);
4179 cycle_nodes.release ();
4183 /* Propagate the estimated effects of individual values along the topological
4184 from the dependent values to those they depend on. */
4186 template <typename valtype>
4187 void
4188 value_topo_info<valtype>::propagate_effects ()
4190 ipcp_value<valtype> *base;
4191 hash_set<ipcp_value<valtype> *> processed_srcvals;
4193 for (base = values_topo; base; base = base->topo_next)
4195 ipcp_value_source<valtype> *src;
4196 ipcp_value<valtype> *val;
4197 sreal time = 0;
4198 HOST_WIDE_INT size = 0;
4200 for (val = base; val; val = val->scc_next)
4202 time = time + val->local_time_benefit + val->prop_time_benefit;
4203 size = size + val->local_size_cost + val->prop_size_cost;
4206 for (val = base; val; val = val->scc_next)
4208 processed_srcvals.empty ();
4209 for (src = val->sources; src; src = src->next)
4210 if (src->val
4211 && src->cs->maybe_hot_p ())
4213 if (!processed_srcvals.add (src->val))
4215 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4216 if (prop_size < INT_MAX)
4217 src->val->prop_size_cost = prop_size;
4218 else
4219 continue;
4222 int special_factor = 1;
4223 if (val->same_scc (src->val))
4224 special_factor
4225 = opt_for_fn(src->cs->caller->decl,
4226 param_ipa_cp_recursive_freq_factor);
4227 else if (val->self_recursion_generated_p ()
4228 && (src->cs->callee->function_symbol ()
4229 == src->cs->caller))
4231 int max_recur_gen_depth
4232 = opt_for_fn(src->cs->caller->decl,
4233 param_ipa_cp_max_recursive_depth);
4234 special_factor = max_recur_gen_depth
4235 - val->self_recursion_generated_level + 1;
4238 src->val->prop_time_benefit
4239 += time * special_factor * src->cs->sreal_frequency ();
4242 if (size < INT_MAX)
4244 val->prop_time_benefit = time;
4245 val->prop_size_cost = size;
4247 else
4249 val->prop_time_benefit = 0;
4250 val->prop_size_cost = 0;
4256 /* Callback for qsort to sort counts of all edges. */
4258 static int
4259 compare_edge_profile_counts (const void *a, const void *b)
4261 const profile_count *cnt1 = (const profile_count *) a;
4262 const profile_count *cnt2 = (const profile_count *) b;
4264 if (*cnt1 < *cnt2)
4265 return 1;
4266 if (*cnt1 > *cnt2)
4267 return -1;
4268 return 0;
4272 /* Propagate constants, polymorphic contexts and their effects from the
4273 summaries interprocedurally. */
4275 static void
4276 ipcp_propagate_stage (class ipa_topo_info *topo)
4278 struct cgraph_node *node;
4280 if (dump_file)
4281 fprintf (dump_file, "\n Propagating constants:\n\n");
4283 base_count = profile_count::uninitialized ();
4285 bool compute_count_base = false;
4286 unsigned base_count_pos_percent = 0;
4287 FOR_EACH_DEFINED_FUNCTION (node)
4289 if (node->has_gimple_body_p ()
4290 && opt_for_fn (node->decl, flag_ipa_cp)
4291 && opt_for_fn (node->decl, optimize))
4293 ipa_node_params *info = ipa_node_params_sum->get (node);
4294 determine_versionability (node, info);
4296 unsigned nlattices = ipa_get_param_count (info);
4297 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4298 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4299 initialize_node_lattices (node);
4301 ipa_size_summary *s = ipa_size_summaries->get (node);
4302 if (node->definition && !node->alias && s != NULL)
4303 overall_size += s->self_size;
4304 if (node->count.ipa ().initialized_p ())
4306 compute_count_base = true;
4307 unsigned pos_percent = opt_for_fn (node->decl,
4308 param_ipa_cp_profile_count_base);
4309 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4313 if (compute_count_base)
4315 auto_vec<profile_count> all_edge_counts;
4316 all_edge_counts.reserve_exact (symtab->edges_count);
4317 FOR_EACH_DEFINED_FUNCTION (node)
4318 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4320 profile_count count = cs->count.ipa ();
4321 if (!count.nonzero_p ())
4322 continue;
4324 enum availability avail;
4325 cgraph_node *tgt
4326 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4327 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4328 if (info && info->versionable)
4329 all_edge_counts.quick_push (count);
4332 if (!all_edge_counts.is_empty ())
4334 gcc_assert (base_count_pos_percent <= 100);
4335 all_edge_counts.qsort (compare_edge_profile_counts);
4337 unsigned base_count_pos
4338 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4339 base_count = all_edge_counts[base_count_pos];
4341 if (dump_file)
4343 fprintf (dump_file, "\nSelected base_count from %u edges at "
4344 "position %u, arriving at: ", all_edge_counts.length (),
4345 base_count_pos);
4346 base_count.dump (dump_file);
4347 fprintf (dump_file, "\n");
4350 else if (dump_file)
4351 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4352 "continuing as if without profile feedback.\n");
4355 orig_overall_size = overall_size;
4357 if (dump_file)
4358 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4360 propagate_constants_topo (topo);
4361 if (flag_checking)
4362 ipcp_verify_propagated_values ();
4363 topo->constants.propagate_effects ();
4364 topo->contexts.propagate_effects ();
4366 if (dump_file)
4368 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4369 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4373 /* Discover newly direct outgoing edges from NODE which is a new clone with
4374 known KNOWN_CSTS and make them direct. */
4376 static void
4377 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4378 vec<tree> known_csts,
4379 vec<ipa_polymorphic_call_context>
4380 known_contexts,
4381 vec<ipa_argagg_value, va_gc> *aggvals)
4383 struct cgraph_edge *ie, *next_ie;
4384 bool found = false;
4386 for (ie = node->indirect_calls; ie; ie = next_ie)
4388 tree target;
4389 bool speculative;
4391 next_ie = ie->next_callee;
4392 ipa_argagg_value_list avs (aggvals);
4393 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4394 avs, &speculative);
4395 if (target)
4397 bool agg_contents = ie->indirect_info->agg_contents;
4398 bool polymorphic = ie->indirect_info->polymorphic;
4399 int param_index = ie->indirect_info->param_index;
4400 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4401 speculative);
4402 found = true;
4404 if (cs && !agg_contents && !polymorphic)
4406 ipa_node_params *info = ipa_node_params_sum->get (node);
4407 int c = ipa_get_controlled_uses (info, param_index);
4408 if (c != IPA_UNDESCRIBED_USE
4409 && !ipa_get_param_load_dereferenced (info, param_index))
4411 struct ipa_ref *to_del;
4413 c--;
4414 ipa_set_controlled_uses (info, param_index, c);
4415 if (dump_file && (dump_flags & TDF_DETAILS))
4416 fprintf (dump_file, " controlled uses count of param "
4417 "%i bumped down to %i\n", param_index, c);
4418 if (c == 0
4419 && (to_del = node->find_reference (cs->callee, NULL, 0,
4420 IPA_REF_ADDR)))
4422 if (dump_file && (dump_flags & TDF_DETAILS))
4423 fprintf (dump_file, " and even removing its "
4424 "cloning-created reference\n");
4425 to_del->remove_reference ();
4431 /* Turning calls to direct calls will improve overall summary. */
4432 if (found)
4433 ipa_update_overall_fn_summary (node);
4436 class edge_clone_summary;
4437 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4439 /* Edge clone summary. */
4441 class edge_clone_summary
4443 public:
4444 /* Default constructor. */
4445 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4447 /* Default destructor. */
4448 ~edge_clone_summary ()
4450 if (prev_clone)
4451 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4452 if (next_clone)
4453 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4456 cgraph_edge *prev_clone;
4457 cgraph_edge *next_clone;
4460 class edge_clone_summary_t:
4461 public call_summary <edge_clone_summary *>
4463 public:
4464 edge_clone_summary_t (symbol_table *symtab):
4465 call_summary <edge_clone_summary *> (symtab)
4467 m_initialize_when_cloning = true;
4470 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4471 edge_clone_summary *src_data,
4472 edge_clone_summary *dst_data) final override;
4475 /* Edge duplication hook. */
4477 void
4478 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4479 edge_clone_summary *src_data,
4480 edge_clone_summary *dst_data)
4482 if (src_data->next_clone)
4483 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4484 dst_data->prev_clone = src_edge;
4485 dst_data->next_clone = src_data->next_clone;
4486 src_data->next_clone = dst_edge;
4489 /* Return true is CS calls DEST or its clone for all contexts. When
4490 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4491 edges from/to an all-context clone. */
4493 static bool
4494 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4495 bool allow_recursion_to_clone)
4497 enum availability availability;
4498 cgraph_node *callee = cs->callee->function_symbol (&availability);
4500 if (availability <= AVAIL_INTERPOSABLE)
4501 return false;
4502 if (callee == dest)
4503 return true;
4504 if (!allow_recursion_to_clone && cs->caller == callee)
4505 return false;
4507 ipa_node_params *info = ipa_node_params_sum->get (callee);
4508 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4511 /* Return true if edge CS does bring about the value described by SRC to
4512 DEST_VAL of node DEST or its clone for all contexts. */
4514 static bool
4515 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4516 cgraph_node *dest, ipcp_value<tree> *dest_val)
4518 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4520 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4521 || caller_info->node_dead)
4522 return false;
4524 if (!src->val)
4525 return true;
4527 if (caller_info->ipcp_orig_node)
4529 tree t = NULL_TREE;
4530 if (src->offset == -1)
4531 t = caller_info->known_csts[src->index];
4532 else if (ipcp_transformation *ts
4533 = ipcp_get_transformation_summary (cs->caller))
4535 ipa_argagg_value_list avl (ts);
4536 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4538 return (t != NULL_TREE
4539 && values_equal_for_ipcp_p (src->val->value, t));
4541 else
4543 if (src->val == dest_val)
4544 return true;
4546 struct ipcp_agg_lattice *aglat;
4547 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4548 src->index);
4549 if (src->offset == -1)
4550 return (plats->itself.is_single_const ()
4551 && values_equal_for_ipcp_p (src->val->value,
4552 plats->itself.values->value));
4553 else
4555 if (plats->aggs_bottom || plats->aggs_contain_variable)
4556 return false;
4557 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4558 if (aglat->offset == src->offset)
4559 return (aglat->is_single_const ()
4560 && values_equal_for_ipcp_p (src->val->value,
4561 aglat->values->value));
4563 return false;
4567 /* Return true if edge CS does bring about the value described by SRC to
4568 DST_VAL of node DEST or its clone for all contexts. */
4570 static bool
4571 cgraph_edge_brings_value_p (cgraph_edge *cs,
4572 ipcp_value_source<ipa_polymorphic_call_context> *src,
4573 cgraph_node *dest,
4574 ipcp_value<ipa_polymorphic_call_context> *)
4576 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4578 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4579 || caller_info->node_dead)
4580 return false;
4581 if (!src->val)
4582 return true;
4584 if (caller_info->ipcp_orig_node)
4585 return (caller_info->known_contexts.length () > (unsigned) src->index)
4586 && values_equal_for_ipcp_p (src->val->value,
4587 caller_info->known_contexts[src->index]);
4589 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4590 src->index);
4591 return plats->ctxlat.is_single_const ()
4592 && values_equal_for_ipcp_p (src->val->value,
4593 plats->ctxlat.values->value);
4596 /* Get the next clone in the linked list of clones of an edge. */
4598 static inline struct cgraph_edge *
4599 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4601 edge_clone_summary *s = edge_clone_summaries->get (cs);
4602 return s != NULL ? s->next_clone : NULL;
4605 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4606 of them is viable and hot, return true. In that case, for those that still
4607 hold, add their edge frequency and their number and cumulative profile
4608 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4609 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4611 template <typename valtype>
4612 static bool
4613 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4614 sreal *freq_sum, int *caller_count,
4615 profile_count *rec_count_sum,
4616 profile_count *nonrec_count_sum)
4618 ipcp_value_source<valtype> *src;
4619 sreal freq = 0;
4620 int count = 0;
4621 profile_count rec_cnt = profile_count::zero ();
4622 profile_count nonrec_cnt = profile_count::zero ();
4623 bool hot = false;
4624 bool non_self_recursive = false;
4626 for (src = val->sources; src; src = src->next)
4628 struct cgraph_edge *cs = src->cs;
4629 while (cs)
4631 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4633 count++;
4634 freq += cs->sreal_frequency ();
4635 hot |= cs->maybe_hot_p ();
4636 if (cs->caller != dest)
4638 non_self_recursive = true;
4639 if (cs->count.ipa ().initialized_p ())
4640 rec_cnt += cs->count.ipa ();
4642 else if (cs->count.ipa ().initialized_p ())
4643 nonrec_cnt += cs->count.ipa ();
4645 cs = get_next_cgraph_edge_clone (cs);
4649 /* If the only edges bringing a value are self-recursive ones, do not bother
4650 evaluating it. */
4651 if (!non_self_recursive)
4652 return false;
4654 *freq_sum = freq;
4655 *caller_count = count;
4656 *rec_count_sum = rec_cnt;
4657 *nonrec_count_sum = nonrec_cnt;
4659 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4661 struct cgraph_edge *cs;
4663 /* Cold non-SCC source edge could trigger hot recursive execution of
4664 function. Consider the case as hot and rely on following cost model
4665 computation to further select right one. */
4666 for (cs = dest->callers; cs; cs = cs->next_caller)
4667 if (cs->caller == dest && cs->maybe_hot_p ())
4668 return true;
4671 return hot;
4674 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4675 to let a non-self-recursive caller be the first element. Thus, we can
4676 simplify intersecting operations on values that arrive from all of these
4677 callers, especially when there exists self-recursive call. Return true if
4678 this kind of adjustment is possible. */
4680 static bool
4681 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4682 cgraph_node *node)
4684 for (unsigned i = 0; i < callers.length (); i++)
4686 cgraph_edge *cs = callers[i];
4688 if (cs->caller != node)
4690 if (i > 0)
4692 callers[i] = callers[0];
4693 callers[0] = cs;
4695 return true;
4698 return false;
4701 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4702 is assumed their number is known and equal to CALLER_COUNT. */
4704 template <typename valtype>
4705 static vec<cgraph_edge *>
4706 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4707 int caller_count)
4709 ipcp_value_source<valtype> *src;
4710 vec<cgraph_edge *> ret;
4712 ret.create (caller_count);
4713 for (src = val->sources; src; src = src->next)
4715 struct cgraph_edge *cs = src->cs;
4716 while (cs)
4718 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4719 ret.quick_push (cs);
4720 cs = get_next_cgraph_edge_clone (cs);
4724 if (caller_count > 1)
4725 adjust_callers_for_value_intersection (ret, dest);
4727 return ret;
4730 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4731 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4732 should be set to true when the reference created for the constant should be
4733 a load one and not an address one because the corresponding parameter p is
4734 only used as *p. */
4736 static struct ipa_replace_map *
4737 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4738 bool force_load_ref)
4740 struct ipa_replace_map *replace_map;
4742 replace_map = ggc_alloc<ipa_replace_map> ();
4743 if (dump_file)
4745 fprintf (dump_file, " replacing ");
4746 ipa_dump_param (dump_file, info, parm_num);
4748 fprintf (dump_file, " with const ");
4749 print_generic_expr (dump_file, value);
4751 if (force_load_ref)
4752 fprintf (dump_file, " - forcing load reference\n");
4753 else
4754 fprintf (dump_file, "\n");
4756 replace_map->parm_num = parm_num;
4757 replace_map->new_tree = value;
4758 replace_map->force_load_ref = force_load_ref;
4759 return replace_map;
4762 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4763 one, otherwise it will be referred to as the original node. */
4765 static void
4766 dump_profile_updates (cgraph_node *node, bool spec)
4768 if (spec)
4769 fprintf (dump_file, " setting count of the specialized node %s to ",
4770 node->dump_name ());
4771 else
4772 fprintf (dump_file, " setting count of the original node %s to ",
4773 node->dump_name ());
4775 node->count.dump (dump_file);
4776 fprintf (dump_file, "\n");
4777 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4779 fprintf (dump_file, " edge to %s has count ",
4780 cs->callee->dump_name ());
4781 cs->count.dump (dump_file);
4782 fprintf (dump_file, "\n");
4786 /* With partial train run we do not want to assume that original's count is
4787 zero whenever we redurect all executed edges to clone. Simply drop profile
4788 to local one in this case. In eany case, return the new value. ORIG_NODE
4789 is the original node and its count has not been updaed yet. */
4791 profile_count
4792 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4794 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4795 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4796 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4797 remainder = remainder.guessed_local ();
4799 return remainder;
4802 /* Structure to sum counts coming from nodes other than the original node and
4803 its clones. */
4805 struct gather_other_count_struct
4807 cgraph_node *orig;
4808 profile_count other_count;
4811 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4812 counts that come from non-self-recursive calls.. */
4814 static bool
4815 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4817 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4818 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4819 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4820 desc->other_count += cs->count.ipa ();
4821 return false;
4824 /* Structure to help analyze if we need to boost counts of some clones of some
4825 non-recursive edges to match the new callee count. */
4827 struct desc_incoming_count_struct
4829 cgraph_node *orig;
4830 hash_set <cgraph_edge *> *processed_edges;
4831 profile_count count;
4832 unsigned unproc_orig_rec_edges;
4835 /* Go over edges calling NODE and its thunks and gather information about
4836 incoming counts so that we know if we need to make any adjustments. */
4838 static void
4839 analyze_clone_icoming_counts (cgraph_node *node,
4840 desc_incoming_count_struct *desc)
4842 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4843 if (cs->caller->thunk)
4845 analyze_clone_icoming_counts (cs->caller, desc);
4846 continue;
4848 else
4850 if (cs->count.initialized_p ())
4851 desc->count += cs->count.ipa ();
4852 if (!desc->processed_edges->contains (cs)
4853 && cs->caller->clone_of == desc->orig)
4854 desc->unproc_orig_rec_edges++;
4858 /* If caller edge counts of a clone created for a self-recursive arithmetic
4859 jump function must be adjusted because it is coming from a the "seed" clone
4860 for the first value and so has been excessively scaled back as if it was not
4861 a recursive call, adjust it so that the incoming counts of NODE match its
4862 count. NODE is the node or its thunk. */
4864 static void
4865 adjust_clone_incoming_counts (cgraph_node *node,
4866 desc_incoming_count_struct *desc)
4868 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4869 if (cs->caller->thunk)
4871 adjust_clone_incoming_counts (cs->caller, desc);
4872 profile_count sum = profile_count::zero ();
4873 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4874 if (e->count.initialized_p ())
4875 sum += e->count.ipa ();
4876 cs->count = cs->count.combine_with_ipa_count (sum);
4878 else if (!desc->processed_edges->contains (cs)
4879 && cs->caller->clone_of == desc->orig)
4881 cs->count += desc->count;
4882 if (dump_file)
4884 fprintf (dump_file, " Adjusted count of an incoming edge of "
4885 "a clone %s -> %s to ", cs->caller->dump_name (),
4886 cs->callee->dump_name ());
4887 cs->count.dump (dump_file);
4888 fprintf (dump_file, "\n");
4893 /* When ORIG_NODE has been cloned for values which have been generated fora
4894 self-recursive call as a result of an arithmetic pass-through
4895 jump-functions, adjust its count together with counts of all such clones in
4896 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4898 The function sums the counts of the original node and all its clones that
4899 cannot be attributed to a specific clone because it comes from a
4900 non-recursive edge. This sum is then evenly divided between the clones and
4901 on top of that each one gets all the counts which can be attributed directly
4902 to it. */
4904 static void
4905 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4906 const vec<cgraph_node *> &self_gen_clones)
4908 profile_count redist_sum = orig_node->count.ipa ();
4909 if (!(redist_sum > profile_count::zero ()))
4910 return;
4912 if (dump_file)
4913 fprintf (dump_file, " Updating profile of self recursive clone "
4914 "series\n");
4916 gather_other_count_struct gocs;
4917 gocs.orig = orig_node;
4918 gocs.other_count = profile_count::zero ();
4920 auto_vec <profile_count, 8> other_edges_count;
4921 for (cgraph_node *n : self_gen_clones)
4923 gocs.other_count = profile_count::zero ();
4924 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4925 &gocs, false);
4926 other_edges_count.safe_push (gocs.other_count);
4927 redist_sum -= gocs.other_count;
4930 hash_set<cgraph_edge *> processed_edges;
4931 unsigned i = 0;
4932 for (cgraph_node *n : self_gen_clones)
4934 profile_count orig_count = n->count;
4935 profile_count new_count
4936 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4937 new_count = lenient_count_portion_handling (new_count, orig_node);
4938 n->count = new_count;
4939 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4940 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4942 cs->count = cs->count.apply_scale (new_count, orig_count);
4943 processed_edges.add (cs);
4945 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4946 cs->count = cs->count.apply_scale (new_count, orig_count);
4948 i++;
4951 /* There are still going to be edges to ORIG_NODE that have one or more
4952 clones coming from another node clone in SELF_GEN_CLONES and which we
4953 scaled by the same amount, which means that the total incoming sum of
4954 counts to ORIG_NODE will be too high, scale such edges back. */
4955 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4957 if (cs->callee->ultimate_alias_target () == orig_node)
4959 unsigned den = 0;
4960 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4961 if (e->callee->ultimate_alias_target () == orig_node
4962 && processed_edges.contains (e))
4963 den++;
4964 if (den > 0)
4965 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4966 if (e->callee->ultimate_alias_target () == orig_node
4967 && processed_edges.contains (e))
4968 e->count /= den;
4972 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4973 along self-recursive edges are likely to have fairly low count and so
4974 edges from them to nodes in the self_gen_clones do not correspond to the
4975 artificially distributed count of the nodes, the total sum of incoming
4976 edges to some clones might be too low. Detect this situation and correct
4977 it. */
4978 for (cgraph_node *n : self_gen_clones)
4980 if (!(n->count.ipa () > profile_count::zero ()))
4981 continue;
4983 desc_incoming_count_struct desc;
4984 desc.orig = orig_node;
4985 desc.processed_edges = &processed_edges;
4986 desc.count = profile_count::zero ();
4987 desc.unproc_orig_rec_edges = 0;
4988 analyze_clone_icoming_counts (n, &desc);
4990 if (n->count.differs_from_p (desc.count))
4992 if (n->count > desc.count
4993 && desc.unproc_orig_rec_edges > 0)
4995 desc.count = n->count - desc.count;
4996 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4997 adjust_clone_incoming_counts (n, &desc);
4999 else if (dump_file)
5000 fprintf (dump_file,
5001 " Unable to fix up incoming counts for %s.\n",
5002 n->dump_name ());
5006 if (dump_file)
5007 for (cgraph_node *n : self_gen_clones)
5008 dump_profile_updates (n, n != orig_node);
5009 return;
5012 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5013 their profile information to reflect this. This function should not be used
5014 for clones generated for arithmetic pass-through jump functions on a
5015 self-recursive call graph edge, that situation is handled by
5016 update_counts_for_self_gen_clones. */
5018 static void
5019 update_profiling_info (struct cgraph_node *orig_node,
5020 struct cgraph_node *new_node)
5022 struct caller_statistics stats;
5023 profile_count new_sum;
5024 profile_count remainder, orig_node_count = orig_node->count.ipa ();
5026 if (!(orig_node_count > profile_count::zero ()))
5027 return;
5029 if (dump_file)
5031 fprintf (dump_file, " Updating profile from original count: ");
5032 orig_node_count.dump (dump_file);
5033 fprintf (dump_file, "\n");
5036 init_caller_stats (&stats, new_node);
5037 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
5038 false);
5039 new_sum = stats.count_sum;
5041 bool orig_edges_processed = false;
5042 if (new_sum > orig_node_count)
5044 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5045 to global0 category. */
5046 remainder = orig_node->count.global0 ();
5048 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5049 cs->count = cs->count.global0 ();
5050 for (cgraph_edge *cs = orig_node->indirect_calls;
5052 cs = cs->next_callee)
5053 cs->count = cs->count.global0 ();
5054 orig_edges_processed = true;
5056 else if (stats.rec_count_sum.nonzero_p ())
5058 int new_nonrec_calls = stats.n_nonrec_calls;
5059 /* There are self-recursive edges which are likely to bring in the
5060 majority of calls but which we must divide in between the original and
5061 new node. */
5062 init_caller_stats (&stats, orig_node);
5063 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
5064 &stats, false);
5065 int orig_nonrec_calls = stats.n_nonrec_calls;
5066 profile_count orig_nonrec_call_count = stats.count_sum;
5068 if (orig_node->local)
5070 if (!orig_nonrec_call_count.nonzero_p ())
5072 if (dump_file)
5073 fprintf (dump_file, " The original is local and the only "
5074 "incoming edges from non-dead callers with nonzero "
5075 "counts are self-recursive, assuming it is cold.\n");
5076 /* The NEW_NODE count and counts of all its outgoing edges
5077 are still unmodified copies of ORIG_NODE's. Just clear
5078 the latter and bail out. */
5079 profile_count zero;
5080 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5081 zero = profile_count::zero ().guessed_local ();
5082 else
5083 zero = profile_count::adjusted_zero ();
5084 orig_node->count = zero;
5085 for (cgraph_edge *cs = orig_node->callees;
5087 cs = cs->next_callee)
5088 cs->count = zero;
5089 for (cgraph_edge *cs = orig_node->indirect_calls;
5091 cs = cs->next_callee)
5092 cs->count = zero;
5093 return;
5096 else
5098 /* Let's behave as if there was another caller that accounts for all
5099 the calls that were either indirect or from other compilation
5100 units. */
5101 orig_nonrec_calls++;
5102 profile_count pretend_caller_count
5103 = (orig_node_count - new_sum - orig_nonrec_call_count
5104 - stats.rec_count_sum);
5105 orig_nonrec_call_count += pretend_caller_count;
5108 /* Divide all "unexplained" counts roughly proportionally to sums of
5109 counts of non-recursive calls.
5111 We put rather arbitrary limits on how many counts we claim because the
5112 number of non-self-recursive incoming count is only a rough guideline
5113 and there are cases (such as mcf) where using it blindly just takes
5114 too many. And if lattices are considered in the opposite order we
5115 could also take too few. */
5116 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5118 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5119 profile_count new_part
5120 = MAX(MIN (unexp.apply_scale (new_sum,
5121 new_sum + orig_nonrec_call_count),
5122 unexp.apply_scale (limit_den - 1, limit_den)),
5123 unexp.apply_scale (new_nonrec_calls, limit_den));
5124 if (dump_file)
5126 fprintf (dump_file, " Claiming ");
5127 new_part.dump (dump_file);
5128 fprintf (dump_file, " of unexplained ");
5129 unexp.dump (dump_file);
5130 fprintf (dump_file, " counts because of self-recursive "
5131 "calls\n");
5133 new_sum += new_part;
5134 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5135 orig_node);
5137 else
5138 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5139 orig_node);
5141 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5142 new_node->count = new_sum;
5143 orig_node->count = remainder;
5145 profile_count orig_new_node_count = orig_node_count;
5146 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5147 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5148 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5149 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5150 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5152 if (!orig_edges_processed)
5154 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5155 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5156 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5157 for (cgraph_edge *cs = orig_node->indirect_calls;
5159 cs = cs->next_callee)
5160 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5163 if (dump_file)
5165 dump_profile_updates (new_node, true);
5166 dump_profile_updates (orig_node, false);
5170 /* Update the respective profile of specialized NEW_NODE and the original
5171 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5172 have been redirected to the specialized version. */
5174 static void
5175 update_specialized_profile (struct cgraph_node *new_node,
5176 struct cgraph_node *orig_node,
5177 profile_count redirected_sum)
5179 struct cgraph_edge *cs;
5180 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
5182 if (dump_file)
5184 fprintf (dump_file, " the sum of counts of redirected edges is ");
5185 redirected_sum.dump (dump_file);
5186 fprintf (dump_file, "\n old ipa count of the original node is ");
5187 orig_node_count.dump (dump_file);
5188 fprintf (dump_file, "\n");
5190 if (!(orig_node_count > profile_count::zero ()))
5191 return;
5193 new_node_count = new_node->count;
5194 new_node->count += redirected_sum;
5195 orig_node->count
5196 = lenient_count_portion_handling (orig_node->count - redirected_sum,
5197 orig_node);
5199 for (cs = new_node->callees; cs; cs = cs->next_callee)
5200 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5202 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5204 profile_count dec = cs->count.apply_scale (redirected_sum,
5205 orig_node_count);
5206 cs->count -= dec;
5209 if (dump_file)
5211 dump_profile_updates (new_node, true);
5212 dump_profile_updates (orig_node, false);
5216 static void adjust_references_in_caller (cgraph_edge *cs,
5217 symtab_node *symbol, int index);
5219 /* Simple structure to pass a symbol and index (with same meaning as parameters
5220 of adjust_references_in_caller) through a void* parameter of a
5221 call_for_symbol_thunks_and_aliases callback. */
5222 struct symbol_and_index_together
5224 symtab_node *symbol;
5225 int index;
5228 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5229 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5230 static bool
5231 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5233 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5234 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5235 if (!cs->caller->thunk)
5236 adjust_references_in_caller (cs, pack->symbol, pack->index);
5237 return false;
5240 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5241 variable which is only dereferenced and which is represented by SYMBOL. See
5242 if we can remove ADDR reference in callers assosiated witht the call. */
5244 static void
5245 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5247 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5248 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5249 if (jfunc->type == IPA_JF_CONST)
5251 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5252 cs->lto_stmt_uid,
5253 IPA_REF_ADDR);
5254 if (!to_del)
5255 return;
5256 to_del->remove_reference ();
5257 ipa_zap_jf_refdesc (jfunc);
5258 if (dump_file)
5259 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5260 cs->caller->dump_name (), symbol->dump_name ());
5261 return;
5264 if (jfunc->type != IPA_JF_PASS_THROUGH
5265 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
5266 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
5267 return;
5269 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5270 cgraph_node *caller = cs->caller;
5271 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5272 /* TODO: This consistency check may be too big and not really
5273 that useful. Consider removing it. */
5274 tree cst;
5275 if (caller_info->ipcp_orig_node)
5276 cst = caller_info->known_csts[fidx];
5277 else
5279 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5280 gcc_assert (lat->is_single_const ());
5281 cst = lat->values->value;
5283 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5284 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5285 == symbol));
5287 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5288 if (cuses == IPA_UNDESCRIBED_USE)
5289 return;
5290 gcc_assert (cuses > 0);
5291 cuses--;
5292 ipa_set_controlled_uses (caller_info, fidx, cuses);
5293 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5294 if (dump_file && (dump_flags & TDF_DETAILS))
5295 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5296 "to %i.\n", fidx, caller->dump_name (), cuses);
5297 if (cuses)
5298 return;
5300 if (caller_info->ipcp_orig_node)
5302 /* Cloning machinery has created a reference here, we need to either
5303 remove it or change it to a read one. */
5304 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5305 if (to_del)
5307 to_del->remove_reference ();
5308 if (dump_file)
5309 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5310 cs->caller->dump_name (), symbol->dump_name ());
5311 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5313 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5314 if (dump_file)
5315 fprintf (dump_file,
5316 " ...and replaced it with LOAD one.\n");
5321 symbol_and_index_together pack;
5322 pack.symbol = symbol;
5323 pack.index = fidx;
5324 if (caller->can_change_signature)
5325 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5326 &pack, true);
5330 /* Return true if we would like to remove a parameter from NODE when cloning it
5331 with KNOWN_CSTS scalar constants. */
5333 static bool
5334 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5336 auto_vec<bool, 16> surviving;
5337 bool filled_vec = false;
5338 ipa_node_params *info = ipa_node_params_sum->get (node);
5339 int i, count = ipa_get_param_count (info);
5341 for (i = 0; i < count; i++)
5343 if (!known_csts[i] && ipa_is_param_used (info, i))
5344 continue;
5346 if (!filled_vec)
5348 clone_info *info = clone_info::get (node);
5349 if (!info || !info->param_adjustments)
5350 return true;
5351 info->param_adjustments->get_surviving_params (&surviving);
5352 filled_vec = true;
5354 if (surviving.length() < (unsigned) i && surviving[i])
5355 return true;
5357 return false;
5360 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5361 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5362 redirect all edges in CALLERS to it. */
5364 static struct cgraph_node *
5365 create_specialized_node (struct cgraph_node *node,
5366 vec<tree> known_csts,
5367 vec<ipa_polymorphic_call_context> known_contexts,
5368 vec<ipa_argagg_value, va_gc> *aggvals,
5369 vec<cgraph_edge *> &callers)
5371 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5372 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5373 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5374 struct cgraph_node *new_node;
5375 int i, count = ipa_get_param_count (info);
5376 clone_info *cinfo = clone_info::get (node);
5377 ipa_param_adjustments *old_adjustments = cinfo
5378 ? cinfo->param_adjustments : NULL;
5379 ipa_param_adjustments *new_adjustments;
5380 gcc_assert (!info->ipcp_orig_node);
5381 gcc_assert (node->can_change_signature
5382 || !old_adjustments);
5384 if (old_adjustments)
5386 /* At the moment all IPA optimizations should use the number of
5387 parameters of the prevailing decl as the m_always_copy_start.
5388 Handling any other value would complicate the code below, so for the
5389 time bing let's only assert it is so. */
5390 gcc_assert (old_adjustments->m_always_copy_start == count
5391 || old_adjustments->m_always_copy_start < 0);
5392 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5393 for (i = 0; i < old_adj_count; i++)
5395 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5396 if (!node->can_change_signature
5397 || old_adj->op != IPA_PARAM_OP_COPY
5398 || (!known_csts[old_adj->base_index]
5399 && ipa_is_param_used (info, old_adj->base_index)))
5401 ipa_adjusted_param new_adj = *old_adj;
5403 new_adj.prev_clone_adjustment = true;
5404 new_adj.prev_clone_index = i;
5405 vec_safe_push (new_params, new_adj);
5408 bool skip_return = old_adjustments->m_skip_return;
5409 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5410 ipa_param_adjustments (new_params, count,
5411 skip_return));
5413 else if (node->can_change_signature
5414 && want_remove_some_param_p (node, known_csts))
5416 ipa_adjusted_param adj;
5417 memset (&adj, 0, sizeof (adj));
5418 adj.op = IPA_PARAM_OP_COPY;
5419 for (i = 0; i < count; i++)
5420 if (!known_csts[i] && ipa_is_param_used (info, i))
5422 adj.base_index = i;
5423 adj.prev_clone_index = i;
5424 vec_safe_push (new_params, adj);
5426 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5427 ipa_param_adjustments (new_params, count, false));
5429 else
5430 new_adjustments = NULL;
5432 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5433 for (i = callers.length () - 1; i >= 0; i--)
5435 cgraph_edge *cs = callers[i];
5436 if (cs->caller == node)
5438 self_recursive_calls.safe_push (cs);
5439 callers.unordered_remove (i);
5442 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5443 for (i = 0; i < count; i++)
5445 tree t = known_csts[i];
5446 if (!t)
5447 continue;
5449 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5451 bool load_ref = false;
5452 symtab_node *ref_symbol;
5453 if (TREE_CODE (t) == ADDR_EXPR)
5455 tree base = get_base_address (TREE_OPERAND (t, 0));
5456 if (TREE_CODE (base) == VAR_DECL
5457 && ipa_get_controlled_uses (info, i) == 0
5458 && ipa_get_param_load_dereferenced (info, i)
5459 && (ref_symbol = symtab_node::get (base)))
5461 load_ref = true;
5462 if (node->can_change_signature)
5463 for (cgraph_edge *caller : callers)
5464 adjust_references_in_caller (caller, ref_symbol, i);
5468 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5469 if (replace_map)
5470 vec_safe_push (replace_trees, replace_map);
5473 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5474 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5475 node->decl)));
5476 new_node = node->create_virtual_clone (callers, replace_trees,
5477 new_adjustments, "constprop",
5478 suffix_counter);
5479 suffix_counter++;
5481 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5482 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5484 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5485 /* Cloned edges can disappear during cloning as speculation can be
5486 resolved, check that we have one and that it comes from the last
5487 cloning. */
5488 if (cs && cs->caller == new_node)
5489 cs->redirect_callee_duplicating_thunks (new_node);
5490 /* Any future code that would make more than one clone of an outgoing
5491 edge would confuse this mechanism, so let's check that does not
5492 happen. */
5493 gcc_checking_assert (!cs
5494 || !get_next_cgraph_edge_clone (cs)
5495 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5497 if (have_self_recursive_calls)
5498 new_node->expand_all_artificial_thunks ();
5500 ipa_set_node_agg_value_chain (new_node, aggvals);
5501 for (const ipa_argagg_value &av : aggvals)
5502 new_node->maybe_create_reference (av.value, NULL);
5504 if (dump_file && (dump_flags & TDF_DETAILS))
5506 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5507 if (known_contexts.exists ())
5509 for (i = 0; i < count; i++)
5510 if (!known_contexts[i].useless_p ())
5512 fprintf (dump_file, " known ctx %i is ", i);
5513 known_contexts[i].dump (dump_file);
5516 if (aggvals)
5518 fprintf (dump_file, " Aggregate replacements:");
5519 ipa_argagg_value_list avs (aggvals);
5520 avs.dump (dump_file);
5524 new_info = ipa_node_params_sum->get (new_node);
5525 new_info->ipcp_orig_node = node;
5526 new_node->ipcp_clone = true;
5527 new_info->known_csts = known_csts;
5528 new_info->known_contexts = known_contexts;
5530 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5531 aggvals);
5533 return new_node;
5536 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5537 pass-through function to itself when the cgraph_node involved is not an
5538 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5539 no-operation pass-through. */
5541 static bool
5542 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5543 bool simple = true)
5545 enum availability availability;
5546 if (cs->caller == cs->callee->function_symbol (&availability)
5547 && availability > AVAIL_INTERPOSABLE
5548 && jfunc->type == IPA_JF_PASS_THROUGH
5549 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5550 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5551 && ipa_node_params_sum->get (cs->caller)
5552 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5553 return true;
5554 return false;
5557 /* Return true if JFUNC, which describes a part of an aggregate represented or
5558 pointed to by the i-th parameter of call CS, is a pass-through function to
5559 itself when the cgraph_node involved is not an IPA-CP clone.. When
5560 SIMPLE is true, further check if JFUNC is a simple no-operation
5561 pass-through. */
5563 static bool
5564 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5565 const ipa_agg_jf_item *jfunc,
5566 int i, bool simple = true)
5568 enum availability availability;
5569 if (cs->caller == cs->callee->function_symbol (&availability)
5570 && availability > AVAIL_INTERPOSABLE
5571 && jfunc->jftype == IPA_JF_LOAD_AGG
5572 && jfunc->offset == jfunc->value.load_agg.offset
5573 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5574 && jfunc->value.pass_through.formal_id == i
5575 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5576 && ipa_node_params_sum->get (cs->caller)
5577 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5578 return true;
5579 return false;
5582 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5583 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5585 static void
5586 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5587 vec<tree> &known_csts,
5588 const vec<cgraph_edge *> &callers)
5590 ipa_node_params *info = ipa_node_params_sum->get (node);
5591 int i, count = ipa_get_param_count (info);
5593 for (i = 0; i < count; i++)
5595 struct cgraph_edge *cs;
5596 tree newval = NULL_TREE;
5597 int j;
5598 bool first = true;
5599 tree type = ipa_get_type (info, i);
5601 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5602 continue;
5604 FOR_EACH_VEC_ELT (callers, j, cs)
5606 struct ipa_jump_func *jump_func;
5607 tree t;
5609 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5610 if (!args
5611 || i >= ipa_get_cs_argument_count (args)
5612 || (i == 0
5613 && call_passes_through_thunk (cs)))
5615 newval = NULL_TREE;
5616 break;
5618 jump_func = ipa_get_ith_jump_func (args, i);
5620 /* Besides simple pass-through jump function, arithmetic jump
5621 function could also introduce argument-direct-pass-through for
5622 self-feeding recursive call. For example,
5624 fn (int i)
5626 fn (i & 1);
5629 Given that i is 0, recursive propagation via (i & 1) also gets
5630 0. */
5631 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5633 gcc_assert (newval);
5634 t = ipa_get_jf_arith_result (
5635 ipa_get_jf_pass_through_operation (jump_func),
5636 newval,
5637 ipa_get_jf_pass_through_operand (jump_func),
5638 type);
5640 else
5641 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5642 jump_func, type);
5643 if (!t
5644 || (newval
5645 && !values_equal_for_ipcp_p (t, newval))
5646 || (!first && !newval))
5648 newval = NULL_TREE;
5649 break;
5651 else
5652 newval = t;
5653 first = false;
5656 if (newval)
5658 if (dump_file && (dump_flags & TDF_DETAILS))
5660 fprintf (dump_file, " adding an extra known scalar value ");
5661 print_ipcp_constant_value (dump_file, newval);
5662 fprintf (dump_file, " for ");
5663 ipa_dump_param (dump_file, info, i);
5664 fprintf (dump_file, "\n");
5667 known_csts[i] = newval;
5672 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5673 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5674 CALLERS. */
5676 static void
5677 find_more_contexts_for_caller_subset (cgraph_node *node,
5678 vec<ipa_polymorphic_call_context>
5679 *known_contexts,
5680 const vec<cgraph_edge *> &callers)
5682 ipa_node_params *info = ipa_node_params_sum->get (node);
5683 int i, count = ipa_get_param_count (info);
5685 for (i = 0; i < count; i++)
5687 cgraph_edge *cs;
5689 if (ipa_get_poly_ctx_lat (info, i)->bottom
5690 || (known_contexts->exists ()
5691 && !(*known_contexts)[i].useless_p ()))
5692 continue;
5694 ipa_polymorphic_call_context newval;
5695 bool first = true;
5696 int j;
5698 FOR_EACH_VEC_ELT (callers, j, cs)
5700 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5701 if (!args
5702 || i >= ipa_get_cs_argument_count (args))
5703 return;
5704 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5705 ipa_polymorphic_call_context ctx;
5706 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5707 cs, i, jfunc);
5708 if (first)
5710 newval = ctx;
5711 first = false;
5713 else
5714 newval.meet_with (ctx);
5715 if (newval.useless_p ())
5716 break;
5719 if (!newval.useless_p ())
5721 if (dump_file && (dump_flags & TDF_DETAILS))
5723 fprintf (dump_file, " adding an extra known polymorphic "
5724 "context ");
5725 print_ipcp_constant_value (dump_file, newval);
5726 fprintf (dump_file, " for ");
5727 ipa_dump_param (dump_file, info, i);
5728 fprintf (dump_file, "\n");
5731 if (!known_contexts->exists ())
5732 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5733 true);
5734 (*known_contexts)[i] = newval;
5740 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5741 RES. If INTERIM is non-NULL, it contains the current interim state of
5742 collected aggregate values which can be used to compute values passed over
5743 self-recursive edges.
5745 This basically one iteration of push_agg_values_from_edge over one
5746 parameter, which allows for simpler early returns. */
5748 static void
5749 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5750 vec<ipa_argagg_value> *res,
5751 const ipa_argagg_value_list *interim)
5753 bool agg_values_from_caller = false;
5754 bool agg_jf_preserved = false;
5755 unsigned unit_delta = UINT_MAX;
5756 int src_idx = -1;
5757 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5758 index);
5760 if (jfunc->type == IPA_JF_PASS_THROUGH
5761 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5763 agg_values_from_caller = true;
5764 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5765 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5766 unit_delta = 0;
5768 else if (jfunc->type == IPA_JF_ANCESTOR
5769 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5771 agg_values_from_caller = true;
5772 agg_jf_preserved = true;
5773 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5774 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5777 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5778 if (agg_values_from_caller)
5780 if (caller_info->ipcp_orig_node)
5782 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5783 ipcp_transformation *ts
5784 = ipcp_get_transformation_summary (cs->caller);
5785 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5786 ipcp_param_lattices *orig_plats
5787 = ipa_get_parm_lattices (orig_info, src_idx);
5788 if (ts
5789 && orig_plats->aggs
5790 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5792 ipa_argagg_value_list src (ts);
5793 src.push_adjusted_values (src_idx, index, unit_delta, res);
5794 return;
5797 else
5799 ipcp_param_lattices *src_plats
5800 = ipa_get_parm_lattices (caller_info, src_idx);
5801 if (src_plats->aggs
5802 && !src_plats->aggs_bottom
5803 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5805 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5807 interim->push_adjusted_values (src_idx, index, unit_delta,
5808 res);
5809 return;
5811 if (!src_plats->aggs_contain_variable)
5813 push_agg_values_from_plats (src_plats, index, unit_delta,
5814 res);
5815 return;
5821 if (!jfunc->agg.items)
5822 return;
5823 bool first = true;
5824 unsigned prev_unit_offset = 0;
5825 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5827 tree value, srcvalue;
5828 /* Besides simple pass-through aggregate jump function, arithmetic
5829 aggregate jump function could also bring same aggregate value as
5830 parameter passed-in for self-feeding recursive call. For example,
5832 fn (int *i)
5834 int j = *i & 1;
5835 fn (&j);
5838 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5839 if (interim
5840 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5841 && (srcvalue = interim->get_value(index,
5842 agg_jf.offset / BITS_PER_UNIT)))
5843 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5844 srcvalue,
5845 agg_jf.value.pass_through.operand,
5846 agg_jf.type);
5847 else
5848 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5849 &agg_jf);
5850 if (value)
5852 struct ipa_argagg_value iav;
5853 iav.value = value;
5854 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5855 iav.index = index;
5856 iav.by_ref = jfunc->agg.by_ref;
5857 iav.killed = false;
5859 gcc_assert (first
5860 || iav.unit_offset > prev_unit_offset);
5861 prev_unit_offset = iav.unit_offset;
5862 first = false;
5864 res->safe_push (iav);
5867 return;
5870 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5871 description of ultimate callee of CS or the one it was cloned from (the
5872 summary where lattices are). If INTERIM is non-NULL, it contains the
5873 current interim state of collected aggregate values which can be used to
5874 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5875 is true) and to skip values which clearly will not be part of intersection
5876 with INTERIM. */
5878 static void
5879 push_agg_values_from_edge (struct cgraph_edge *cs,
5880 ipa_node_params *dest_info,
5881 vec<ipa_argagg_value> *res,
5882 const ipa_argagg_value_list *interim,
5883 bool optimize_self_recursion)
5885 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5886 if (!args)
5887 return;
5889 int count = MIN (ipa_get_param_count (dest_info),
5890 ipa_get_cs_argument_count (args));
5892 unsigned interim_index = 0;
5893 for (int index = 0; index < count; index++)
5895 if (interim)
5897 while (interim_index < interim->m_elts.size ()
5898 && interim->m_elts[interim_index].value
5899 && interim->m_elts[interim_index].index < index)
5900 interim_index++;
5901 if (interim_index >= interim->m_elts.size ()
5902 || interim->m_elts[interim_index].index > index)
5903 continue;
5906 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5907 if (!ipa_is_param_used (dest_info, index)
5908 || plats->aggs_bottom)
5909 continue;
5910 push_agg_values_for_index_from_edge (cs, index, res,
5911 optimize_self_recursion ? interim
5912 : NULL);
5917 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5918 from all of them. Return nullptr if there are none. */
5920 static struct vec<ipa_argagg_value, va_gc> *
5921 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5922 const vec<cgraph_edge *> &callers)
5924 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5925 if (dest_info->ipcp_orig_node)
5926 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5928 /* gather_edges_for_value puts a non-recursive call into the first element of
5929 callers if it can. */
5930 auto_vec<ipa_argagg_value, 32> interim;
5931 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5933 unsigned valid_entries = interim.length ();
5934 if (!valid_entries)
5935 return nullptr;
5937 unsigned caller_count = callers.length();
5938 for (unsigned i = 1; i < caller_count; i++)
5940 auto_vec<ipa_argagg_value, 32> last;
5941 ipa_argagg_value_list avs (&interim);
5942 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5944 valid_entries = intersect_argaggs_with (interim, last);
5945 if (!valid_entries)
5946 return nullptr;
5949 vec<ipa_argagg_value, va_gc> *res = NULL;
5950 vec_safe_reserve_exact (res, valid_entries);
5951 for (const ipa_argagg_value &av : interim)
5952 if (av.value)
5953 res->quick_push(av);
5954 gcc_checking_assert (res->length () == valid_entries);
5955 return res;
5958 /* Determine whether CS also brings all scalar values that the NODE is
5959 specialized for. */
5961 static bool
5962 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5963 struct cgraph_node *node)
5965 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5966 int count = ipa_get_param_count (dest_info);
5967 class ipa_node_params *caller_info;
5968 class ipa_edge_args *args;
5969 int i;
5971 caller_info = ipa_node_params_sum->get (cs->caller);
5972 args = ipa_edge_args_sum->get (cs);
5973 for (i = 0; i < count; i++)
5975 struct ipa_jump_func *jump_func;
5976 tree val, t;
5978 val = dest_info->known_csts[i];
5979 if (!val)
5980 continue;
5982 if (i >= ipa_get_cs_argument_count (args))
5983 return false;
5984 jump_func = ipa_get_ith_jump_func (args, i);
5985 t = ipa_value_from_jfunc (caller_info, jump_func,
5986 ipa_get_type (dest_info, i));
5987 if (!t || !values_equal_for_ipcp_p (val, t))
5988 return false;
5990 return true;
5993 /* Determine whether CS also brings all aggregate values that NODE is
5994 specialized for. */
5996 static bool
5997 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5998 struct cgraph_node *node)
6000 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
6001 if (!ts || vec_safe_is_empty (ts->m_agg_values))
6002 return true;
6004 const ipa_argagg_value_list existing (ts->m_agg_values);
6005 auto_vec<ipa_argagg_value, 32> edge_values;
6006 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
6007 gcc_checking_assert (dest_info->ipcp_orig_node);
6008 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
6009 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
6010 const ipa_argagg_value_list avl (&edge_values);
6011 return avl.superset_of_p (existing);
6014 /* Given an original NODE and a VAL for which we have already created a
6015 specialized clone, look whether there are incoming edges that still lead
6016 into the old node but now also bring the requested value and also conform to
6017 all other criteria such that they can be redirected the special node.
6018 This function can therefore redirect the final edge in a SCC. */
6020 template <typename valtype>
6021 static void
6022 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
6024 ipcp_value_source<valtype> *src;
6025 profile_count redirected_sum = profile_count::zero ();
6027 for (src = val->sources; src; src = src->next)
6029 struct cgraph_edge *cs = src->cs;
6030 while (cs)
6032 if (cgraph_edge_brings_value_p (cs, src, node, val)
6033 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
6034 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
6036 if (dump_file)
6037 fprintf (dump_file, " - adding an extra caller %s of %s\n",
6038 cs->caller->dump_name (),
6039 val->spec_node->dump_name ());
6041 cs->redirect_callee_duplicating_thunks (val->spec_node);
6042 val->spec_node->expand_all_artificial_thunks ();
6043 if (cs->count.ipa ().initialized_p ())
6044 redirected_sum = redirected_sum + cs->count.ipa ();
6046 cs = get_next_cgraph_edge_clone (cs);
6050 if (redirected_sum.nonzero_p ())
6051 update_specialized_profile (val->spec_node, node, redirected_sum);
6054 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6056 static bool
6057 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
6059 ipa_polymorphic_call_context *ctx;
6060 int i;
6062 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
6063 if (!ctx->useless_p ())
6064 return true;
6065 return false;
6068 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6070 static vec<ipa_polymorphic_call_context>
6071 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
6073 if (known_contexts_useful_p (known_contexts))
6074 return known_contexts.copy ();
6075 else
6076 return vNULL;
6079 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6080 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6081 copy too. */
6083 static void
6084 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6085 vec<tree> *known_csts,
6086 vec<ipa_polymorphic_call_context> *known_contexts,
6087 ipcp_value<tree> *val, int index)
6089 *known_csts = avals->m_known_vals.copy ();
6090 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6091 (*known_csts)[index] = val->value;
6094 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6095 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6096 INDEX. */
6098 static void
6099 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6100 vec<tree> *known_csts,
6101 vec<ipa_polymorphic_call_context> *known_contexts,
6102 ipcp_value<ipa_polymorphic_call_context> *val,
6103 int index)
6105 *known_csts = avals->m_known_vals.copy ();
6106 *known_contexts = avals->m_known_contexts.copy ();
6107 (*known_contexts)[index] = val->value;
6110 /* Return true if OFFSET indicates this was not an aggregate value or there is
6111 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6112 AGGVALS list. */
6114 DEBUG_FUNCTION bool
6115 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6116 int index, HOST_WIDE_INT offset, tree value)
6118 if (offset == -1)
6119 return true;
6121 const ipa_argagg_value_list avl (aggvals);
6122 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6123 return v && values_equal_for_ipcp_p (v, value);
6126 /* Return true if offset is minus one because source of a polymorphic context
6127 cannot be an aggregate value. */
6129 DEBUG_FUNCTION bool
6130 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6131 int , HOST_WIDE_INT offset,
6132 ipa_polymorphic_call_context)
6134 return offset == -1;
6137 /* Decide whether to create a special version of NODE for value VAL of
6138 parameter at the given INDEX. If OFFSET is -1, the value is for the
6139 parameter itself, otherwise it is stored at the given OFFSET of the
6140 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6141 is a vector which contains clones created for self-recursive calls with an
6142 arithmetic pass-through jump function. */
6144 template <typename valtype>
6145 static bool
6146 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6147 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6148 vec<cgraph_node *> *self_gen_clones)
6150 int caller_count;
6151 sreal freq_sum;
6152 profile_count count_sum, rec_count_sum;
6153 vec<cgraph_edge *> callers;
6155 if (val->spec_node)
6157 perhaps_add_new_callers (node, val);
6158 return false;
6160 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6162 if (dump_file && (dump_flags & TDF_DETAILS))
6163 fprintf (dump_file, " Ignoring candidate value because "
6164 "maximum unit size would be reached with %li.\n",
6165 val->local_size_cost + overall_size);
6166 return false;
6168 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6169 &rec_count_sum, &count_sum))
6170 return false;
6172 if (!dbg_cnt (ipa_cp_values))
6173 return false;
6175 if (val->self_recursion_generated_p ())
6177 /* The edge counts in this case might not have been adjusted yet.
6178 Nevertleless, even if they were it would be only a guesswork which we
6179 can do now. The recursive part of the counts can be derived from the
6180 count of the original node anyway. */
6181 if (node->count.ipa ().nonzero_p ())
6183 unsigned dem = self_gen_clones->length () + 1;
6184 rec_count_sum = node->count.ipa () / dem;
6186 else
6187 rec_count_sum = profile_count::zero ();
6190 /* get_info_about_necessary_edges only sums up ipa counts. */
6191 count_sum += rec_count_sum;
6193 if (dump_file && (dump_flags & TDF_DETAILS))
6195 fprintf (dump_file, " - considering value ");
6196 print_ipcp_constant_value (dump_file, val->value);
6197 fprintf (dump_file, " for ");
6198 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6199 if (offset != -1)
6200 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6201 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6204 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6205 freq_sum, count_sum,
6206 val->local_size_cost)
6207 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6208 freq_sum, count_sum, val->prop_size_cost))
6209 return false;
6211 if (dump_file)
6212 fprintf (dump_file, " Creating a specialized node of %s.\n",
6213 node->dump_name ());
6215 vec<tree> known_csts;
6216 vec<ipa_polymorphic_call_context> known_contexts;
6218 callers = gather_edges_for_value (val, node, caller_count);
6219 if (offset == -1)
6220 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6221 else
6223 known_csts = avals->m_known_vals.copy ();
6224 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6226 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6227 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6228 vec<ipa_argagg_value, va_gc> *aggvals
6229 = find_aggregate_values_for_callers_subset (node, callers);
6230 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6231 offset, val->value));
6232 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6233 aggvals, callers);
6235 if (val->self_recursion_generated_p ())
6236 self_gen_clones->safe_push (val->spec_node);
6237 else
6238 update_profiling_info (node, val->spec_node);
6240 callers.release ();
6241 overall_size += val->local_size_cost;
6242 if (dump_file && (dump_flags & TDF_DETAILS))
6243 fprintf (dump_file, " overall size reached %li\n",
6244 overall_size);
6246 /* TODO: If for some lattice there is only one other known value
6247 left, make a special node for it too. */
6249 return true;
6252 /* Like irange::contains_p(), but convert VAL to the range of R if
6253 necessary. */
6255 static inline bool
6256 ipa_range_contains_p (const vrange &r, tree val)
6258 if (r.undefined_p ())
6259 return false;
6261 tree type = r.type ();
6262 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6263 return false;
6265 val = fold_convert (type, val);
6266 return r.contains_p (val);
6269 /* Decide whether and what specialized clones of NODE should be created. */
6271 static bool
6272 decide_whether_version_node (struct cgraph_node *node)
6274 ipa_node_params *info = ipa_node_params_sum->get (node);
6275 int i, count = ipa_get_param_count (info);
6276 bool ret = false;
6278 if (count == 0)
6279 return false;
6281 if (dump_file && (dump_flags & TDF_DETAILS))
6282 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6283 node->dump_name ());
6285 auto_vec <cgraph_node *, 9> self_gen_clones;
6286 ipa_auto_call_arg_values avals;
6287 gather_context_independent_values (info, &avals, false, NULL);
6289 for (i = 0; i < count;i++)
6291 if (!ipa_is_param_used (info, i))
6292 continue;
6294 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6295 ipcp_lattice<tree> *lat = &plats->itself;
6296 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6298 if (!lat->bottom
6299 && !avals.m_known_vals[i])
6301 ipcp_value<tree> *val;
6302 for (val = lat->values; val; val = val->next)
6304 /* If some values generated for self-recursive calls with
6305 arithmetic jump functions fall outside of the known
6306 range for the parameter, we can skip them. */
6307 if (TREE_CODE (val->value) == INTEGER_CST
6308 && !plats->m_value_range.bottom_p ()
6309 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6310 val->value))
6312 /* This can happen also if a constant present in the source
6313 code falls outside of the range of parameter's type, so we
6314 cannot assert. */
6315 if (dump_file && (dump_flags & TDF_DETAILS))
6317 fprintf (dump_file, " - skipping%s value ",
6318 val->self_recursion_generated_p ()
6319 ? " self_recursion_generated" : "");
6320 print_ipcp_constant_value (dump_file, val->value);
6321 fprintf (dump_file, " because it is outside known "
6322 "value range.\n");
6324 continue;
6326 ret |= decide_about_value (node, i, -1, val, &avals,
6327 &self_gen_clones);
6331 if (!plats->aggs_bottom)
6333 struct ipcp_agg_lattice *aglat;
6334 ipcp_value<tree> *val;
6335 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6336 if (!aglat->bottom && aglat->values
6337 /* If the following is false, the one value has been considered
6338 for cloning for all contexts. */
6339 && (plats->aggs_contain_variable
6340 || !aglat->is_single_const ()))
6341 for (val = aglat->values; val; val = val->next)
6342 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6343 &self_gen_clones);
6346 if (!ctxlat->bottom
6347 && avals.m_known_contexts[i].useless_p ())
6349 ipcp_value<ipa_polymorphic_call_context> *val;
6350 for (val = ctxlat->values; val; val = val->next)
6351 ret |= decide_about_value (node, i, -1, val, &avals,
6352 &self_gen_clones);
6356 if (!self_gen_clones.is_empty ())
6358 self_gen_clones.safe_push (node);
6359 update_counts_for_self_gen_clones (node, self_gen_clones);
6362 if (info->do_clone_for_all_contexts)
6364 if (!dbg_cnt (ipa_cp_values))
6366 info->do_clone_for_all_contexts = false;
6367 return ret;
6370 struct cgraph_node *clone;
6371 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6373 for (int i = callers.length () - 1; i >= 0; i--)
6375 cgraph_edge *cs = callers[i];
6376 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6378 if (caller_info && caller_info->node_dead)
6379 callers.unordered_remove (i);
6382 if (!adjust_callers_for_value_intersection (callers, node))
6384 /* If node is not called by anyone, or all its caller edges are
6385 self-recursive, the node is not really in use, no need to do
6386 cloning. */
6387 info->do_clone_for_all_contexts = false;
6388 return ret;
6391 if (dump_file)
6392 fprintf (dump_file, " - Creating a specialized node of %s "
6393 "for all known contexts.\n", node->dump_name ());
6395 vec<tree> known_csts = avals.m_known_vals.copy ();
6396 vec<ipa_polymorphic_call_context> known_contexts
6397 = copy_useful_known_contexts (avals.m_known_contexts);
6398 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6399 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6400 vec<ipa_argagg_value, va_gc> *aggvals
6401 = find_aggregate_values_for_callers_subset (node, callers);
6403 if (!known_contexts_useful_p (known_contexts))
6405 known_contexts.release ();
6406 known_contexts = vNULL;
6408 clone = create_specialized_node (node, known_csts, known_contexts,
6409 aggvals, callers);
6410 info->do_clone_for_all_contexts = false;
6411 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6412 ret = true;
6415 return ret;
6418 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6420 static void
6421 spread_undeadness (struct cgraph_node *node)
6423 struct cgraph_edge *cs;
6425 for (cs = node->callees; cs; cs = cs->next_callee)
6426 if (ipa_edge_within_scc (cs))
6428 struct cgraph_node *callee;
6429 class ipa_node_params *info;
6431 callee = cs->callee->function_symbol (NULL);
6432 info = ipa_node_params_sum->get (callee);
6434 if (info && info->node_dead)
6436 info->node_dead = 0;
6437 spread_undeadness (callee);
6442 /* Return true if NODE has a caller from outside of its SCC that is not
6443 dead. Worker callback for cgraph_for_node_and_aliases. */
6445 static bool
6446 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6447 void *data ATTRIBUTE_UNUSED)
6449 struct cgraph_edge *cs;
6451 for (cs = node->callers; cs; cs = cs->next_caller)
6452 if (cs->caller->thunk
6453 && cs->caller->call_for_symbol_thunks_and_aliases
6454 (has_undead_caller_from_outside_scc_p, NULL, true))
6455 return true;
6456 else if (!ipa_edge_within_scc (cs))
6458 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6459 if (!caller_info /* Unoptimized caller are like dead ones. */
6460 || !caller_info->node_dead)
6461 return true;
6463 return false;
6467 /* Identify nodes within the same SCC as NODE which are no longer needed
6468 because of new clones and will be removed as unreachable. */
6470 static void
6471 identify_dead_nodes (struct cgraph_node *node)
6473 struct cgraph_node *v;
6474 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6475 if (v->local)
6477 ipa_node_params *info = ipa_node_params_sum->get (v);
6478 if (info
6479 && !v->call_for_symbol_thunks_and_aliases
6480 (has_undead_caller_from_outside_scc_p, NULL, true))
6481 info->node_dead = 1;
6484 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6486 ipa_node_params *info = ipa_node_params_sum->get (v);
6487 if (info && !info->node_dead)
6488 spread_undeadness (v);
6491 if (dump_file && (dump_flags & TDF_DETAILS))
6493 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6494 if (ipa_node_params_sum->get (v)
6495 && ipa_node_params_sum->get (v)->node_dead)
6496 fprintf (dump_file, " Marking node as dead: %s.\n",
6497 v->dump_name ());
6501 /* The decision stage. Iterate over the topological order of call graph nodes
6502 TOPO and make specialized clones if deemed beneficial. */
6504 static void
6505 ipcp_decision_stage (class ipa_topo_info *topo)
6507 int i;
6509 if (dump_file)
6510 fprintf (dump_file, "\nIPA decision stage:\n\n");
6512 for (i = topo->nnodes - 1; i >= 0; i--)
6514 struct cgraph_node *node = topo->order[i];
6515 bool change = false, iterate = true;
6517 while (iterate)
6519 struct cgraph_node *v;
6520 iterate = false;
6521 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6522 if (v->has_gimple_body_p ()
6523 && ipcp_versionable_function_p (v))
6524 iterate |= decide_whether_version_node (v);
6526 change |= iterate;
6528 if (change)
6529 identify_dead_nodes (node);
6533 /* Look up all VR and bits information that we have discovered and copy it
6534 over to the transformation summary. */
6536 static void
6537 ipcp_store_vr_results (void)
6539 cgraph_node *node;
6541 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6543 ipa_node_params *info = ipa_node_params_sum->get (node);
6544 bool dumped_sth = false;
6545 bool found_useful_result = false;
6546 bool do_vr = true;
6547 bool do_bits = true;
6549 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6551 if (dump_file)
6552 fprintf (dump_file, "Not considering %s for VR discovery "
6553 "and propagate; -fipa-ipa-vrp: disabled.\n",
6554 node->dump_name ());
6555 do_vr = false;
6557 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6559 if (dump_file)
6560 fprintf (dump_file, "Not considering %s for ipa bitwise "
6561 "propagation ; -fipa-bit-cp: disabled.\n",
6562 node->dump_name ());
6563 do_bits = false;
6565 if (!do_bits && !do_vr)
6566 continue;
6568 if (info->ipcp_orig_node)
6569 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6570 if (!info->lattices)
6571 /* Newly expanded artificial thunks do not have lattices. */
6572 continue;
6574 unsigned count = ipa_get_param_count (info);
6575 for (unsigned i = 0; i < count; i++)
6577 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6578 if (do_vr
6579 && !plats->m_value_range.bottom_p ()
6580 && !plats->m_value_range.top_p ())
6582 found_useful_result = true;
6583 break;
6585 if (do_bits && plats->bits_lattice.constant_p ())
6587 found_useful_result = true;
6588 break;
6591 if (!found_useful_result)
6592 continue;
6594 ipcp_transformation_initialize ();
6595 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6596 vec_safe_reserve_exact (ts->m_vr, count);
6598 for (unsigned i = 0; i < count; i++)
6600 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6601 ipcp_bits_lattice *bits = NULL;
6603 if (do_bits
6604 && plats->bits_lattice.constant_p ()
6605 && dbg_cnt (ipa_cp_bits))
6606 bits = &plats->bits_lattice;
6608 if (do_vr
6609 && !plats->m_value_range.bottom_p ()
6610 && !plats->m_value_range.top_p ()
6611 && dbg_cnt (ipa_cp_vr))
6613 if (bits)
6615 Value_Range tmp = plats->m_value_range.m_vr;
6616 tree type = ipa_get_type (info, i);
6617 irange &r = as_a<irange> (tmp);
6618 irange_bitmask bm (wide_int::from (bits->get_value (),
6619 TYPE_PRECISION (type),
6620 TYPE_SIGN (type)),
6621 wide_int::from (bits->get_mask (),
6622 TYPE_PRECISION (type),
6623 TYPE_SIGN (type)));
6624 r.update_bitmask (bm);
6625 ipa_vr vr (tmp);
6626 ts->m_vr->quick_push (vr);
6628 else
6630 ipa_vr vr (plats->m_value_range.m_vr);
6631 ts->m_vr->quick_push (vr);
6634 else if (bits)
6636 tree type = ipa_get_type (info, i);
6637 Value_Range tmp;
6638 tmp.set_varying (type);
6639 irange &r = as_a<irange> (tmp);
6640 irange_bitmask bm (wide_int::from (bits->get_value (),
6641 TYPE_PRECISION (type),
6642 TYPE_SIGN (type)),
6643 wide_int::from (bits->get_mask (),
6644 TYPE_PRECISION (type),
6645 TYPE_SIGN (type)));
6646 r.update_bitmask (bm);
6647 ipa_vr vr (tmp);
6648 ts->m_vr->quick_push (vr);
6650 else
6652 ipa_vr vr;
6653 ts->m_vr->quick_push (vr);
6656 if (!dump_file || !bits)
6657 continue;
6659 if (!dumped_sth)
6661 fprintf (dump_file, "Propagated bits info for function %s:\n",
6662 node->dump_name ());
6663 dumped_sth = true;
6665 fprintf (dump_file, " param %i: value = ", i);
6666 print_hex (bits->get_value (), dump_file);
6667 fprintf (dump_file, ", mask = ");
6668 print_hex (bits->get_mask (), dump_file);
6669 fprintf (dump_file, "\n");
6674 /* The IPCP driver. */
6676 static unsigned int
6677 ipcp_driver (void)
6679 class ipa_topo_info topo;
6681 if (edge_clone_summaries == NULL)
6682 edge_clone_summaries = new edge_clone_summary_t (symtab);
6684 ipa_check_create_node_params ();
6685 ipa_check_create_edge_args ();
6686 clone_num_suffixes = new hash_map<const char *, unsigned>;
6688 if (dump_file)
6690 fprintf (dump_file, "\nIPA structures before propagation:\n");
6691 if (dump_flags & TDF_DETAILS)
6692 ipa_print_all_params (dump_file);
6693 ipa_print_all_jump_functions (dump_file);
6696 /* Topological sort. */
6697 build_toporder_info (&topo);
6698 /* Do the interprocedural propagation. */
6699 ipcp_propagate_stage (&topo);
6700 /* Decide what constant propagation and cloning should be performed. */
6701 ipcp_decision_stage (&topo);
6702 /* Store results of value range and bits propagation. */
6703 ipcp_store_vr_results ();
6705 /* Free all IPCP structures. */
6706 delete clone_num_suffixes;
6707 free_toporder_info (&topo);
6708 delete edge_clone_summaries;
6709 edge_clone_summaries = NULL;
6710 ipa_free_all_structures_after_ipa_cp ();
6711 if (dump_file)
6712 fprintf (dump_file, "\nIPA constant propagation end\n");
6713 return 0;
6716 /* Initialization and computation of IPCP data structures. This is the initial
6717 intraprocedural analysis of functions, which gathers information to be
6718 propagated later on. */
6720 static void
6721 ipcp_generate_summary (void)
6723 struct cgraph_node *node;
6725 if (dump_file)
6726 fprintf (dump_file, "\nIPA constant propagation start:\n");
6727 ipa_register_cgraph_hooks ();
6729 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6730 ipa_analyze_node (node);
6733 namespace {
6735 const pass_data pass_data_ipa_cp =
6737 IPA_PASS, /* type */
6738 "cp", /* name */
6739 OPTGROUP_NONE, /* optinfo_flags */
6740 TV_IPA_CONSTANT_PROP, /* tv_id */
6741 0, /* properties_required */
6742 0, /* properties_provided */
6743 0, /* properties_destroyed */
6744 0, /* todo_flags_start */
6745 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6748 class pass_ipa_cp : public ipa_opt_pass_d
6750 public:
6751 pass_ipa_cp (gcc::context *ctxt)
6752 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6753 ipcp_generate_summary, /* generate_summary */
6754 NULL, /* write_summary */
6755 NULL, /* read_summary */
6756 ipcp_write_transformation_summaries, /*
6757 write_optimization_summary */
6758 ipcp_read_transformation_summaries, /*
6759 read_optimization_summary */
6760 NULL, /* stmt_fixup */
6761 0, /* function_transform_todo_flags_start */
6762 ipcp_transform_function, /* function_transform */
6763 NULL) /* variable_transform */
6766 /* opt_pass methods: */
6767 bool gate (function *) final override
6769 /* FIXME: We should remove the optimize check after we ensure we never run
6770 IPA passes when not optimizing. */
6771 return (flag_ipa_cp && optimize) || in_lto_p;
6774 unsigned int execute (function *) final override { return ipcp_driver (); }
6776 }; // class pass_ipa_cp
6778 } // anon namespace
6780 ipa_opt_pass_d *
6781 make_pass_ipa_cp (gcc::context *ctxt)
6783 return new pass_ipa_cp (ctxt);
6786 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6787 within the same process. For use by toplev::finalize. */
6789 void
6790 ipa_cp_cc_finalize (void)
6792 base_count = profile_count::uninitialized ();
6793 overall_size = 0;
6794 orig_overall_size = 0;
6795 ipcp_free_transformation_sum ();
6798 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6799 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6800 DECL_UID-sorted vector if available (which is pre-computed only if there are
6801 many parameters). Can return -1 if param is static chain not represented
6802 among DECL_ARGUMENTS. */
6805 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6807 gcc_assert (TREE_CODE (param) == PARM_DECL);
6808 if (m_uid_to_idx)
6810 unsigned puid = DECL_UID (param);
6811 const ipa_uid_to_idx_map_elt *res
6812 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6813 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6815 return elt.uid < uid;
6817 if (res == m_uid_to_idx->end ()
6818 || res->uid != puid)
6820 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6821 return -1;
6823 return res->index;
6826 unsigned index = 0;
6827 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6828 if (p == param)
6829 return (int) index;
6831 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6832 return -1;
6835 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6836 according to the uid. */
6838 static int
6839 compare_uids (const void *a, const void *b)
6841 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6842 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6843 if (e1->uid < e2->uid)
6844 return -1;
6845 if (e1->uid > e2->uid)
6846 return 1;
6847 gcc_unreachable ();
6850 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6851 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6852 from parameters to their indices in DECL_ARGUMENTS chain. */
6854 void
6855 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6857 int c = count_formal_params (fndecl);
6858 if (c < 32)
6859 return;
6861 m_uid_to_idx = NULL;
6862 vec_safe_reserve (m_uid_to_idx, c, true);
6863 unsigned index = 0;
6864 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6866 ipa_uid_to_idx_map_elt elt;
6867 elt.uid = DECL_UID (p);
6868 elt.index = index;
6869 m_uid_to_idx->quick_push (elt);
6871 m_uid_to_idx->qsort (compare_uids);