hppa: Always enable PIE on 64-bit target
[official-gcc.git] / gcc / ipa-cp.cc
blobb1e2a3a829aba039251348152307069c1737677f
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype> class ipcp_value;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype>
138 struct ipcp_value_source
140 public:
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset;
144 /* The incoming edge that brought the value. */
145 cgraph_edge *cs;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value<valtype> *val;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source *next;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
155 int index;
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
162 public:
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit;
166 /* Time benefit that specializing the function for this value can bring about
167 in it's callees. */
168 sreal prop_time_benefit;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
171 int local_size_cost;
172 /* Size cost that specializing the function for this value can bring about in
173 it's callees. */
174 int prop_size_cost;
176 ipcp_value_base ()
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype>
184 class ipcp_value : public ipcp_value_base
186 public:
187 /* The actual value for the given parameter. */
188 valtype value;
189 /* The list of sources from which this value originates. */
190 ipcp_value_source <valtype> *sources = nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value *next = nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
194 of values. */
195 ipcp_value *scc_next = nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value *topo_next = nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
200 created. */
201 cgraph_node *spec_node = nullptr;
202 /* Depth first search number and low link for topological sorting of
203 values. */
204 int dfs = 0;
205 int low_link = 0;
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
208 int scc_no = 0;
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
213 maximum. */
214 unsigned self_recursion_generated_level = 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack = false;
218 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
219 HOST_WIDE_INT offset);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value<valtype> *o)
225 return o->scc_no == scc_no;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level > 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype>
245 struct ipcp_lattice
247 public:
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value<valtype> *values;
252 /* Number of known values and types in this lattice. */
253 int values_count;
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 propagation). */
258 bool bottom;
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval, cgraph_edge *cs,
264 ipcp_value<valtype> *src_val = NULL,
265 int src_idx = 0, HOST_WIDE_INT offset = -1,
266 ipcp_value<valtype> **val_p = NULL,
267 unsigned same_lat_gen_level = 0);
268 void print (FILE * f, bool dump_sources, bool dump_benefits);
271 /* Lattice of tree values with an offset to describe a part of an
272 aggregate. */
274 struct ipcp_agg_lattice : public ipcp_lattice<tree>
276 public:
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
281 HOST_WIDE_INT size;
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice *next;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
288 value are constant.
289 For eg:
290 int f(int x)
292 return some_op (x);
295 int f1(int y)
297 if (cond)
298 return f (y & 0xff);
299 else
300 return f (y & 0xf);
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
311 public:
312 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
313 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
314 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int, widest_int);
317 bool known_nonzero_p () const;
319 widest_int get_value () const { return m_value; }
320 widest_int get_mask () const { return m_mask; }
322 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
323 enum tree_code, tree, bool);
325 bool meet_with (widest_int, widest_int, unsigned);
327 void print (FILE *);
329 private:
330 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value, m_mask;
337 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
338 void get_value_and_mask (tree, widest_int *, widest_int *);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
345 public:
346 Value_Range m_vr;
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange &p_vr);
352 bool meet_with (const ipcp_vr_lattice &other);
353 void init (tree type);
354 void print (FILE * f);
356 private:
357 bool meet_with_1 (const vrange &other_vr);
360 inline void
361 ipcp_vr_lattice::init (tree type)
363 if (type)
364 m_vr.set_type (type);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
375 public:
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice<tree> itself;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice *aggs;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range;
386 /* Number of aggregate lattices */
387 int aggs_count;
388 /* True if aggregate data were passed by reference (as opposed to by
389 value). */
390 bool aggs_by_ref;
391 /* All aggregate lattices contain a variable component (in addition to
392 values). */
393 bool aggs_contain_variable;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
396 bool aggs_bottom;
398 /* There is a virtual call based on this parameter. */
399 bool virt_call;
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count;
420 /* Original overall size of the program. */
422 static long overall_size, orig_overall_size;
424 /* Node name to unique clone suffix number map. */
425 static hash_map<const char *, unsigned> *clone_num_suffixes;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices *
430 ipa_get_parm_lattices (class ipa_node_params *info, int i)
432 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
433 gcc_checking_assert (!info->ipcp_orig_node);
434 gcc_checking_assert (info->lattices);
435 return &(info->lattices[i]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice<tree> *
441 ipa_get_scalar_lat (class ipa_node_params *info, int i)
443 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
444 return &plats->itself;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice<ipa_polymorphic_call_context> *
450 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
452 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
453 return &plats->ctxlat;
456 /* Return whether LAT is a lattice with a single constant and without an
457 undefined value. */
459 template <typename valtype>
460 inline bool
461 ipcp_lattice<valtype>::is_single_const ()
463 if (bottom || contains_variable || values_count != 1)
464 return false;
465 else
466 return true;
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
471 static bool
472 values_equal_for_ipcp_p (tree x, tree y)
474 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
476 if (x == y)
477 return true;
479 if (TREE_CODE (x) == ADDR_EXPR
480 && TREE_CODE (y) == ADDR_EXPR
481 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
482 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
483 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
484 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
485 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
486 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
487 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
488 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
489 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
490 else
491 return operand_equal_p (x, y, 0);
494 /* Print V which is extracted from a value in a lattice to F. */
496 static void
497 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
499 v.dump(f, false);
502 /* Print a lattice LAT to F. */
504 template <typename valtype>
505 void
506 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
508 ipcp_value<valtype> *val;
509 bool prev = false;
511 if (bottom)
513 fprintf (f, "BOTTOM\n");
514 return;
517 if (!values_count && !contains_variable)
519 fprintf (f, "TOP\n");
520 return;
523 if (contains_variable)
525 fprintf (f, "VARIABLE");
526 prev = true;
527 if (dump_benefits)
528 fprintf (f, "\n");
531 for (val = values; val; val = val->next)
533 if (dump_benefits && prev)
534 fprintf (f, " ");
535 else if (!dump_benefits && prev)
536 fprintf (f, ", ");
537 else
538 prev = true;
540 print_ipcp_constant_value (f, val->value);
542 if (dump_sources)
544 ipcp_value_source<valtype> *s;
546 if (val->self_recursion_generated_p ())
547 fprintf (f, " [self_gen(%i), from:",
548 val->self_recursion_generated_level);
549 else
550 fprintf (f, " [scc: %i, from:", val->scc_no);
551 for (s = val->sources; s; s = s->next)
552 fprintf (f, " %i(%f)", s->cs->caller->order,
553 s->cs->sreal_frequency ().to_double ());
554 fprintf (f, "]");
557 if (dump_benefits)
558 fprintf (f, " [loc_time: %g, loc_size: %i, "
559 "prop_time: %g, prop_size: %i]\n",
560 val->local_time_benefit.to_double (), val->local_size_cost,
561 val->prop_time_benefit.to_double (), val->prop_size_cost);
563 if (!dump_benefits)
564 fprintf (f, "\n");
567 void
568 ipcp_bits_lattice::print (FILE *f)
570 if (top_p ())
571 fprintf (f, " Bits unknown (TOP)\n");
572 else if (bottom_p ())
573 fprintf (f, " Bits unusable (BOTTOM)\n");
574 else
576 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
577 fprintf (f, ", mask = "); print_hex (get_mask (), f);
578 fprintf (f, "\n");
582 /* Print value range lattice to F. */
584 void
585 ipcp_vr_lattice::print (FILE * f)
587 m_vr.dump (f);
590 /* Print all ipcp_lattices of all functions to F. */
592 static void
593 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
595 struct cgraph_node *node;
596 int i, count;
598 fprintf (f, "\nLattices:\n");
599 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
601 class ipa_node_params *info;
603 info = ipa_node_params_sum->get (node);
604 /* Skip unoptimized functions and constprop clones since we don't make
605 lattices for them. */
606 if (!info || info->ipcp_orig_node)
607 continue;
608 fprintf (f, " Node: %s:\n", node->dump_name ());
609 count = ipa_get_param_count (info);
610 for (i = 0; i < count; i++)
612 struct ipcp_agg_lattice *aglat;
613 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
614 fprintf (f, " param [%d]: ", i);
615 plats->itself.print (f, dump_sources, dump_benefits);
616 fprintf (f, " ctxs: ");
617 plats->ctxlat.print (f, dump_sources, dump_benefits);
618 plats->bits_lattice.print (f);
619 fprintf (f, " ");
620 plats->m_value_range.print (f);
621 fprintf (f, "\n");
622 if (plats->virt_call)
623 fprintf (f, " virt_call flag set\n");
625 if (plats->aggs_bottom)
627 fprintf (f, " AGGS BOTTOM\n");
628 continue;
630 if (plats->aggs_contain_variable)
631 fprintf (f, " AGGS VARIABLE\n");
632 for (aglat = plats->aggs; aglat; aglat = aglat->next)
634 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
635 plats->aggs_by_ref ? "ref " : "", aglat->offset);
636 aglat->print (f, dump_sources, dump_benefits);
642 /* Determine whether it is at all technically possible to create clones of NODE
643 and store this information in the ipa_node_params structure associated
644 with NODE. */
646 static void
647 determine_versionability (struct cgraph_node *node,
648 class ipa_node_params *info)
650 const char *reason = NULL;
652 /* There are a number of generic reasons functions cannot be versioned. We
653 also cannot remove parameters if there are type attributes such as fnspec
654 present. */
655 if (node->alias || node->thunk)
656 reason = "alias or thunk";
657 else if (!node->versionable)
658 reason = "not a tree_versionable_function";
659 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
660 reason = "insufficient body availability";
661 else if (!opt_for_fn (node->decl, optimize)
662 || !opt_for_fn (node->decl, flag_ipa_cp))
663 reason = "non-optimized function";
664 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
666 /* Ideally we should clone the SIMD clones themselves and create
667 vector copies of them, so IPA-cp and SIMD clones can happily
668 coexist, but that may not be worth the effort. */
669 reason = "function has SIMD clones";
671 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
673 /* Ideally we should clone the target clones themselves and create
674 copies of them, so IPA-cp and target clones can happily
675 coexist, but that may not be worth the effort. */
676 reason = "function target_clones attribute";
678 /* Don't clone decls local to a comdat group; it breaks and for C++
679 decloned constructors, inlining is always better anyway. */
680 else if (node->comdat_local_p ())
681 reason = "comdat-local function";
682 else if (node->calls_comdat_local)
684 /* TODO: call is versionable if we make sure that all
685 callers are inside of a comdat group. */
686 reason = "calls comdat-local function";
689 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
690 work only when inlined. Cloning them may still lead to better code
691 because ipa-cp will not give up on cloning further. If the function is
692 external this however leads to wrong code because we may end up producing
693 offline copy of the function. */
694 if (DECL_EXTERNAL (node->decl))
695 for (cgraph_edge *edge = node->callees; !reason && edge;
696 edge = edge->next_callee)
697 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
699 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
700 reason = "external function which calls va_arg_pack";
701 if (DECL_FUNCTION_CODE (edge->callee->decl)
702 == BUILT_IN_VA_ARG_PACK_LEN)
703 reason = "external function which calls va_arg_pack_len";
706 if (reason && dump_file && !node->alias && !node->thunk)
707 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
708 node->dump_name (), reason);
710 info->versionable = (reason == NULL);
713 /* Return true if it is at all technically possible to create clones of a
714 NODE. */
716 static bool
717 ipcp_versionable_function_p (struct cgraph_node *node)
719 ipa_node_params *info = ipa_node_params_sum->get (node);
720 return info && info->versionable;
723 /* Structure holding accumulated information about callers of a node. */
725 struct caller_statistics
727 /* If requested (see below), self-recursive call counts are summed into this
728 field. */
729 profile_count rec_count_sum;
730 /* The sum of all ipa counts of all the other (non-recursive) calls. */
731 profile_count count_sum;
732 /* Sum of all frequencies for all calls. */
733 sreal freq_sum;
734 /* Number of calls and hot calls respectively. */
735 int n_calls, n_hot_calls;
736 /* If itself is set up, also count the number of non-self-recursive
737 calls. */
738 int n_nonrec_calls;
739 /* If non-NULL, this is the node itself and calls from it should have their
740 counts included in rec_count_sum and not count_sum. */
741 cgraph_node *itself;
744 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
745 from IGNORED_CALLER are not counted. */
747 static inline void
748 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
750 stats->rec_count_sum = profile_count::zero ();
751 stats->count_sum = profile_count::zero ();
752 stats->n_calls = 0;
753 stats->n_hot_calls = 0;
754 stats->n_nonrec_calls = 0;
755 stats->freq_sum = 0;
756 stats->itself = itself;
759 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
760 non-thunk incoming edges to NODE. */
762 static bool
763 gather_caller_stats (struct cgraph_node *node, void *data)
765 struct caller_statistics *stats = (struct caller_statistics *) data;
766 struct cgraph_edge *cs;
768 for (cs = node->callers; cs; cs = cs->next_caller)
769 if (!cs->caller->thunk)
771 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
772 if (info && info->node_dead)
773 continue;
775 if (cs->count.ipa ().initialized_p ())
777 if (stats->itself && stats->itself == cs->caller)
778 stats->rec_count_sum += cs->count.ipa ();
779 else
780 stats->count_sum += cs->count.ipa ();
782 stats->freq_sum += cs->sreal_frequency ();
783 stats->n_calls++;
784 if (stats->itself && stats->itself != cs->caller)
785 stats->n_nonrec_calls++;
787 if (cs->maybe_hot_p ())
788 stats->n_hot_calls ++;
790 return false;
794 /* Return true if this NODE is viable candidate for cloning. */
796 static bool
797 ipcp_cloning_candidate_p (struct cgraph_node *node)
799 struct caller_statistics stats;
801 gcc_checking_assert (node->has_gimple_body_p ());
803 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
805 if (dump_file)
806 fprintf (dump_file, "Not considering %s for cloning; "
807 "-fipa-cp-clone disabled.\n",
808 node->dump_name ());
809 return false;
812 if (node->optimize_for_size_p ())
814 if (dump_file)
815 fprintf (dump_file, "Not considering %s for cloning; "
816 "optimizing it for size.\n",
817 node->dump_name ());
818 return false;
821 init_caller_stats (&stats);
822 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
824 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
826 if (dump_file)
827 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
828 node->dump_name ());
829 return true;
832 /* When profile is available and function is hot, propagate into it even if
833 calls seems cold; constant propagation can improve function's speed
834 significantly. */
835 if (stats.count_sum > profile_count::zero ()
836 && node->count.ipa ().initialized_p ())
838 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
840 if (dump_file)
841 fprintf (dump_file, "Considering %s for cloning; "
842 "usually called directly.\n",
843 node->dump_name ());
844 return true;
847 if (!stats.n_hot_calls)
849 if (dump_file)
850 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
851 node->dump_name ());
852 return false;
854 if (dump_file)
855 fprintf (dump_file, "Considering %s for cloning.\n",
856 node->dump_name ());
857 return true;
860 template <typename valtype>
861 class value_topo_info
863 public:
864 /* Head of the linked list of topologically sorted values. */
865 ipcp_value<valtype> *values_topo;
866 /* Stack for creating SCCs, represented by a linked list too. */
867 ipcp_value<valtype> *stack;
868 /* Counter driving the algorithm in add_val_to_toposort. */
869 int dfs_counter;
871 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
873 void add_val (ipcp_value<valtype> *cur_val);
874 void propagate_effects ();
877 /* Arrays representing a topological ordering of call graph nodes and a stack
878 of nodes used during constant propagation and also data required to perform
879 topological sort of values and propagation of benefits in the determined
880 order. */
882 class ipa_topo_info
884 public:
885 /* Array with obtained topological order of cgraph nodes. */
886 struct cgraph_node **order;
887 /* Stack of cgraph nodes used during propagation within SCC until all values
888 in the SCC stabilize. */
889 struct cgraph_node **stack;
890 int nnodes, stack_top;
892 value_topo_info<tree> constants;
893 value_topo_info<ipa_polymorphic_call_context> contexts;
895 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
896 constants ()
900 /* Skip edges from and to nodes without ipa_cp enabled.
901 Ignore not available symbols. */
903 static bool
904 ignore_edge_p (cgraph_edge *e)
906 enum availability avail;
907 cgraph_node *ultimate_target
908 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
910 return (avail <= AVAIL_INTERPOSABLE
911 || !opt_for_fn (ultimate_target->decl, optimize)
912 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
915 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
917 static void
918 build_toporder_info (class ipa_topo_info *topo)
920 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
921 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
923 gcc_checking_assert (topo->stack_top == 0);
924 topo->nnodes = ipa_reduced_postorder (topo->order, true,
925 ignore_edge_p);
928 /* Free information about strongly connected components and the arrays in
929 TOPO. */
931 static void
932 free_toporder_info (class ipa_topo_info *topo)
934 ipa_free_postorder_info ();
935 free (topo->order);
936 free (topo->stack);
939 /* Add NODE to the stack in TOPO, unless it is already there. */
941 static inline void
942 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
944 ipa_node_params *info = ipa_node_params_sum->get (node);
945 if (info->node_enqueued)
946 return;
947 info->node_enqueued = 1;
948 topo->stack[topo->stack_top++] = node;
951 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
952 is empty. */
954 static struct cgraph_node *
955 pop_node_from_stack (class ipa_topo_info *topo)
957 if (topo->stack_top)
959 struct cgraph_node *node;
960 topo->stack_top--;
961 node = topo->stack[topo->stack_top];
962 ipa_node_params_sum->get (node)->node_enqueued = 0;
963 return node;
965 else
966 return NULL;
969 /* Set lattice LAT to bottom and return true if it previously was not set as
970 such. */
972 template <typename valtype>
973 inline bool
974 ipcp_lattice<valtype>::set_to_bottom ()
976 bool ret = !bottom;
977 bottom = true;
978 return ret;
981 /* Mark lattice as containing an unknown value and return true if it previously
982 was not marked as such. */
984 template <typename valtype>
985 inline bool
986 ipcp_lattice<valtype>::set_contains_variable ()
988 bool ret = !contains_variable;
989 contains_variable = true;
990 return ret;
993 /* Set all aggregate lattices in PLATS to bottom and return true if they were
994 not previously set as such. */
996 static inline bool
997 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
999 bool ret = !plats->aggs_bottom;
1000 plats->aggs_bottom = true;
1001 return ret;
1004 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1005 return true if they were not previously marked as such. */
1007 static inline bool
1008 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1010 bool ret = !plats->aggs_contain_variable;
1011 plats->aggs_contain_variable = true;
1012 return ret;
1015 bool
1016 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1018 return meet_with_1 (other.m_vr);
1021 /* Meet the current value of the lattice with the range described by
1022 P_VR. */
1024 bool
1025 ipcp_vr_lattice::meet_with (const vrange &p_vr)
1027 return meet_with_1 (p_vr);
1030 /* Meet the current value of the lattice with the range described by
1031 OTHER_VR. Return TRUE if anything changed. */
1033 bool
1034 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
1036 if (bottom_p ())
1037 return false;
1039 if (other_vr.varying_p ())
1040 return set_to_bottom ();
1042 bool res;
1043 if (flag_checking)
1045 Value_Range save (m_vr);
1046 res = m_vr.union_ (other_vr);
1047 gcc_assert (res == (m_vr != save));
1049 else
1050 res = m_vr.union_ (other_vr);
1051 return res;
1054 /* Return true if value range information in the lattice is yet unknown. */
1056 bool
1057 ipcp_vr_lattice::top_p () const
1059 return m_vr.undefined_p ();
1062 /* Return true if value range information in the lattice is known to be
1063 unusable. */
1065 bool
1066 ipcp_vr_lattice::bottom_p () const
1068 return m_vr.varying_p ();
1071 /* Set value range information in the lattice to bottom. Return true if it
1072 previously was in a different state. */
1074 bool
1075 ipcp_vr_lattice::set_to_bottom ()
1077 if (m_vr.varying_p ())
1078 return false;
1080 /* Setting an unsupported type here forces the temporary to default
1081 to unsupported_range, which can handle VARYING/DEFINED ranges,
1082 but nothing else (union, intersect, etc). This allows us to set
1083 bottoms on any ranges, and is safe as all users of the lattice
1084 check for bottom first. */
1085 m_vr.set_type (void_type_node);
1086 m_vr.set_varying (void_type_node);
1088 return true;
1091 /* Set lattice value to bottom, if it already isn't the case. */
1093 bool
1094 ipcp_bits_lattice::set_to_bottom ()
1096 if (bottom_p ())
1097 return false;
1098 m_lattice_val = IPA_BITS_VARYING;
1099 m_value = 0;
1100 m_mask = -1;
1101 return true;
1104 /* Set to constant if it isn't already. Only meant to be called
1105 when switching state from TOP. */
1107 bool
1108 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1110 gcc_assert (top_p ());
1111 m_lattice_val = IPA_BITS_CONSTANT;
1112 m_value = wi::bit_and (wi::bit_not (mask), value);
1113 m_mask = mask;
1114 return true;
1117 /* Return true if any of the known bits are non-zero. */
1119 bool
1120 ipcp_bits_lattice::known_nonzero_p () const
1122 if (!constant_p ())
1123 return false;
1124 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1127 /* Convert operand to value, mask form. */
1129 void
1130 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1132 wide_int get_nonzero_bits (const_tree);
1134 if (TREE_CODE (operand) == INTEGER_CST)
1136 *valuep = wi::to_widest (operand);
1137 *maskp = 0;
1139 else
1141 *valuep = 0;
1142 *maskp = -1;
1146 /* Meet operation, similar to ccp_lattice_meet, we xor values
1147 if this->value, value have different values at same bit positions, we want
1148 to drop that bit to varying. Return true if mask is changed.
1149 This function assumes that the lattice value is in CONSTANT state. If
1150 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1152 bool
1153 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1154 unsigned precision, bool drop_all_ones)
1156 gcc_assert (constant_p ());
1158 widest_int old_mask = m_mask;
1159 m_mask = (m_mask | mask) | (m_value ^ value);
1160 if (drop_all_ones)
1161 m_mask |= m_value;
1162 m_value &= ~m_mask;
1164 if (wi::sext (m_mask, precision) == -1)
1165 return set_to_bottom ();
1167 return m_mask != old_mask;
1170 /* Meet the bits lattice with operand
1171 described by <value, mask, sgn, precision. */
1173 bool
1174 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1175 unsigned precision)
1177 if (bottom_p ())
1178 return false;
1180 if (top_p ())
1182 if (wi::sext (mask, precision) == -1)
1183 return set_to_bottom ();
1184 return set_to_constant (value, mask);
1187 return meet_with_1 (value, mask, precision, false);
1190 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1191 if code is binary operation or bit_value_unop (other) if code is unary op.
1192 In the case when code is nop_expr, no adjustment is required. If
1193 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1195 bool
1196 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1197 signop sgn, enum tree_code code, tree operand,
1198 bool drop_all_ones)
1200 if (other.bottom_p ())
1201 return set_to_bottom ();
1203 if (bottom_p () || other.top_p ())
1204 return false;
1206 widest_int adjusted_value, adjusted_mask;
1208 if (TREE_CODE_CLASS (code) == tcc_binary)
1210 tree type = TREE_TYPE (operand);
1211 widest_int o_value, o_mask;
1212 get_value_and_mask (operand, &o_value, &o_mask);
1214 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1215 sgn, precision, other.get_value (), other.get_mask (),
1216 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1218 if (wi::sext (adjusted_mask, precision) == -1)
1219 return set_to_bottom ();
1222 else if (TREE_CODE_CLASS (code) == tcc_unary)
1224 bit_value_unop (code, sgn, precision, &adjusted_value,
1225 &adjusted_mask, sgn, precision, other.get_value (),
1226 other.get_mask ());
1228 if (wi::sext (adjusted_mask, precision) == -1)
1229 return set_to_bottom ();
1232 else
1233 return set_to_bottom ();
1235 if (top_p ())
1237 if (drop_all_ones)
1239 adjusted_mask |= adjusted_value;
1240 adjusted_value &= ~adjusted_mask;
1242 if (wi::sext (adjusted_mask, precision) == -1)
1243 return set_to_bottom ();
1244 return set_to_constant (adjusted_value, adjusted_mask);
1246 else
1247 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1248 drop_all_ones);
1251 /* Dump the contents of the list to FILE. */
1253 void
1254 ipa_argagg_value_list::dump (FILE *f)
1256 bool comma = false;
1257 for (const ipa_argagg_value &av : m_elts)
1259 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1260 av.index, av.unit_offset);
1261 print_generic_expr (f, av.value);
1262 if (av.by_ref)
1263 fprintf (f, "(by_ref)");
1264 if (av.killed)
1265 fprintf (f, "(killed)");
1266 comma = true;
1268 fprintf (f, "\n");
1271 /* Dump the contents of the list to stderr. */
1273 void
1274 ipa_argagg_value_list::debug ()
1276 dump (stderr);
1279 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1280 NULL if there is no such constant. */
1282 const ipa_argagg_value *
1283 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1285 ipa_argagg_value key;
1286 key.index = index;
1287 key.unit_offset = unit_offset;
1288 const ipa_argagg_value *res
1289 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1290 [] (const ipa_argagg_value &elt,
1291 const ipa_argagg_value &val)
1293 if (elt.index < val.index)
1294 return true;
1295 if (elt.index > val.index)
1296 return false;
1297 if (elt.unit_offset < val.unit_offset)
1298 return true;
1299 return false;
1302 if (res == m_elts.end ()
1303 || res->index != index
1304 || res->unit_offset != unit_offset)
1305 res = nullptr;
1307 /* TODO: perhaps remove the check (that the underlying array is indeed
1308 sorted) if it turns out it can be too slow? */
1309 if (!flag_checking)
1310 return res;
1312 const ipa_argagg_value *slow_res = NULL;
1313 int prev_index = -1;
1314 unsigned prev_unit_offset = 0;
1315 for (const ipa_argagg_value &av : m_elts)
1317 gcc_assert (prev_index < 0
1318 || prev_index < av.index
1319 || prev_unit_offset < av.unit_offset);
1320 prev_index = av.index;
1321 prev_unit_offset = av.unit_offset;
1322 if (av.index == index
1323 && av.unit_offset == unit_offset)
1324 slow_res = &av;
1326 gcc_assert (res == slow_res);
1328 return res;
1331 /* Return the first item describing a constant stored for parameter with INDEX,
1332 regardless of offset or reference, or NULL if there is no such constant. */
1334 const ipa_argagg_value *
1335 ipa_argagg_value_list::get_elt_for_index (int index) const
1337 const ipa_argagg_value *res
1338 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1339 [] (const ipa_argagg_value &elt, unsigned idx)
1341 return elt.index < idx;
1343 if (res == m_elts.end ()
1344 || res->index != index)
1345 res = nullptr;
1346 return res;
1349 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1350 performing any check of whether value is passed by reference, or NULL_TREE
1351 if there is no such constant. */
1353 tree
1354 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1356 const ipa_argagg_value *av = get_elt (index, unit_offset);
1357 return av ? av->value : NULL_TREE;
1360 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1361 passed by reference or not according to BY_REF, or NULL_TREE if there is
1362 no such constant. */
1364 tree
1365 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1366 bool by_ref) const
1368 const ipa_argagg_value *av = get_elt (index, unit_offset);
1369 if (av && av->by_ref == by_ref)
1370 return av->value;
1371 return NULL_TREE;
1374 /* Return true if all elements present in OTHER are also present in this
1375 list. */
1377 bool
1378 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1380 unsigned j = 0;
1381 for (unsigned i = 0; i < other.m_elts.size (); i++)
1383 unsigned other_index = other.m_elts[i].index;
1384 unsigned other_offset = other.m_elts[i].unit_offset;
1386 while (j < m_elts.size ()
1387 && (m_elts[j].index < other_index
1388 || (m_elts[j].index == other_index
1389 && m_elts[j].unit_offset < other_offset)))
1390 j++;
1392 if (j >= m_elts.size ()
1393 || m_elts[j].index != other_index
1394 || m_elts[j].unit_offset != other_offset
1395 || m_elts[j].by_ref != other.m_elts[i].by_ref
1396 || !m_elts[j].value
1397 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1398 return false;
1400 return true;
1403 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1404 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1405 offsets but skip those which would end up with a negative offset. */
1407 void
1408 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1409 unsigned dest_index,
1410 unsigned unit_delta,
1411 vec<ipa_argagg_value> *res) const
1413 const ipa_argagg_value *av = get_elt_for_index (src_index);
1414 if (!av)
1415 return;
1416 unsigned prev_unit_offset = 0;
1417 bool first = true;
1418 for (; av < m_elts.end (); ++av)
1420 if (av->index > src_index)
1421 return;
1422 if (av->index == src_index
1423 && (av->unit_offset >= unit_delta)
1424 && av->value)
1426 ipa_argagg_value new_av;
1427 gcc_checking_assert (av->value);
1428 new_av.value = av->value;
1429 new_av.unit_offset = av->unit_offset - unit_delta;
1430 new_av.index = dest_index;
1431 new_av.by_ref = av->by_ref;
1432 gcc_assert (!av->killed);
1433 new_av.killed = false;
1435 /* Quick check that the offsets we push are indeed increasing. */
1436 gcc_assert (first
1437 || new_av.unit_offset > prev_unit_offset);
1438 prev_unit_offset = new_av.unit_offset;
1439 first = false;
1441 res->safe_push (new_av);
1446 /* Push to RES information about single lattices describing aggregate values in
1447 PLATS as those describing parameter DEST_INDEX and the original offset minus
1448 UNIT_DELTA. Return true if any item has been pushed to RES. */
1450 static bool
1451 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1452 unsigned unit_delta,
1453 vec<ipa_argagg_value> *res)
1455 if (plats->aggs_contain_variable)
1456 return false;
1458 bool pushed_sth = false;
1459 bool first = true;
1460 unsigned prev_unit_offset = 0;
1461 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1462 if (aglat->is_single_const ()
1463 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1465 ipa_argagg_value iav;
1466 iav.value = aglat->values->value;
1467 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1468 iav.index = dest_index;
1469 iav.by_ref = plats->aggs_by_ref;
1470 iav.killed = false;
1472 gcc_assert (first
1473 || iav.unit_offset > prev_unit_offset);
1474 prev_unit_offset = iav.unit_offset;
1475 first = false;
1477 pushed_sth = true;
1478 res->safe_push (iav);
1480 return pushed_sth;
1483 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1484 Return the number of remaining valid entries. */
1486 static unsigned
1487 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1488 const vec<ipa_argagg_value> &other)
1490 unsigned valid_entries = 0;
1491 unsigned j = 0;
1492 for (unsigned i = 0; i < elts.length (); i++)
1494 if (!elts[i].value)
1495 continue;
1497 unsigned this_index = elts[i].index;
1498 unsigned this_offset = elts[i].unit_offset;
1500 while (j < other.length ()
1501 && (other[j].index < this_index
1502 || (other[j].index == this_index
1503 && other[j].unit_offset < this_offset)))
1504 j++;
1506 if (j >= other.length ())
1508 elts[i].value = NULL_TREE;
1509 continue;
1512 if (other[j].index == this_index
1513 && other[j].unit_offset == this_offset
1514 && other[j].by_ref == elts[i].by_ref
1515 && other[j].value
1516 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1517 valid_entries++;
1518 else
1519 elts[i].value = NULL_TREE;
1521 return valid_entries;
1524 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1525 return true is any of them has not been marked as such so far. */
1527 static inline bool
1528 set_all_contains_variable (class ipcp_param_lattices *plats)
1530 bool ret;
1531 ret = plats->itself.set_contains_variable ();
1532 ret |= plats->ctxlat.set_contains_variable ();
1533 ret |= set_agg_lats_contain_variable (plats);
1534 ret |= plats->bits_lattice.set_to_bottom ();
1535 ret |= plats->m_value_range.set_to_bottom ();
1536 return ret;
1539 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1540 points to by the number of callers to NODE. */
1542 static bool
1543 count_callers (cgraph_node *node, void *data)
1545 int *caller_count = (int *) data;
1547 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1548 /* Local thunks can be handled transparently, but if the thunk cannot
1549 be optimized out, count it as a real use. */
1550 if (!cs->caller->thunk || !cs->caller->local)
1551 ++*caller_count;
1552 return false;
1555 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1556 the one caller of some other node. Set the caller's corresponding flag. */
1558 static bool
1559 set_single_call_flag (cgraph_node *node, void *)
1561 cgraph_edge *cs = node->callers;
1562 /* Local thunks can be handled transparently, skip them. */
1563 while (cs && cs->caller->thunk && cs->caller->local)
1564 cs = cs->next_caller;
1565 if (cs)
1566 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1568 info->node_calling_single_call = true;
1569 return true;
1571 return false;
1574 /* Initialize ipcp_lattices. */
1576 static void
1577 initialize_node_lattices (struct cgraph_node *node)
1579 ipa_node_params *info = ipa_node_params_sum->get (node);
1580 struct cgraph_edge *ie;
1581 bool disable = false, variable = false;
1582 int i;
1584 gcc_checking_assert (node->has_gimple_body_p ());
1586 if (!ipa_get_param_count (info))
1587 disable = true;
1588 else if (node->local)
1590 int caller_count = 0;
1591 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1592 true);
1593 gcc_checking_assert (caller_count > 0);
1594 if (caller_count == 1)
1595 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1596 NULL, true);
1598 else
1600 /* When cloning is allowed, we can assume that externally visible
1601 functions are not called. We will compensate this by cloning
1602 later. */
1603 if (ipcp_versionable_function_p (node)
1604 && ipcp_cloning_candidate_p (node))
1605 variable = true;
1606 else
1607 disable = true;
1610 if (dump_file && (dump_flags & TDF_DETAILS)
1611 && !node->alias && !node->thunk)
1613 fprintf (dump_file, "Initializing lattices of %s\n",
1614 node->dump_name ());
1615 if (disable || variable)
1616 fprintf (dump_file, " Marking all lattices as %s\n",
1617 disable ? "BOTTOM" : "VARIABLE");
1620 auto_vec<bool, 16> surviving_params;
1621 bool pre_modified = false;
1623 clone_info *cinfo = clone_info::get (node);
1625 if (!disable && cinfo && cinfo->param_adjustments)
1627 /* At the moment all IPA optimizations should use the number of
1628 parameters of the prevailing decl as the m_always_copy_start.
1629 Handling any other value would complicate the code below, so for the
1630 time bing let's only assert it is so. */
1631 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1632 == ipa_get_param_count (info))
1633 || cinfo->param_adjustments->m_always_copy_start < 0);
1635 pre_modified = true;
1636 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1638 if (dump_file && (dump_flags & TDF_DETAILS)
1639 && !node->alias && !node->thunk)
1641 bool first = true;
1642 for (int j = 0; j < ipa_get_param_count (info); j++)
1644 if (j < (int) surviving_params.length ()
1645 && surviving_params[j])
1646 continue;
1647 if (first)
1649 fprintf (dump_file,
1650 " The following parameters are dead on arrival:");
1651 first = false;
1653 fprintf (dump_file, " %u", j);
1655 if (!first)
1656 fprintf (dump_file, "\n");
1660 for (i = 0; i < ipa_get_param_count (info); i++)
1662 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1663 tree type = ipa_get_type (info, i);
1664 if (disable
1665 || !ipa_get_type (info, i)
1666 || (pre_modified && (surviving_params.length () <= (unsigned) i
1667 || !surviving_params[i])))
1669 plats->itself.set_to_bottom ();
1670 plats->ctxlat.set_to_bottom ();
1671 set_agg_lats_to_bottom (plats);
1672 plats->bits_lattice.set_to_bottom ();
1673 plats->m_value_range.init (type);
1674 plats->m_value_range.set_to_bottom ();
1676 else
1678 plats->m_value_range.init (type);
1679 if (variable)
1680 set_all_contains_variable (plats);
1684 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1685 if (ie->indirect_info->polymorphic
1686 && ie->indirect_info->param_index >= 0)
1688 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1689 ipa_get_parm_lattices (info,
1690 ie->indirect_info->param_index)->virt_call = 1;
1694 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1695 PARAM_TYPE. */
1697 static bool
1698 ipacp_value_safe_for_type (tree param_type, tree value)
1700 tree val_type = TREE_TYPE (value);
1701 if (param_type == val_type
1702 || useless_type_conversion_p (param_type, val_type)
1703 || fold_convertible_p (param_type, value))
1704 return true;
1705 else
1706 return false;
1709 /* Return the result of a (possibly arithmetic) operation on the constant
1710 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1711 the type of the parameter to which the result is passed. Return
1712 NULL_TREE if that cannot be determined or be considered an
1713 interprocedural invariant. */
1715 static tree
1716 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1717 tree res_type)
1719 tree res;
1721 if (opcode == NOP_EXPR)
1722 return input;
1723 if (!is_gimple_ip_invariant (input))
1724 return NULL_TREE;
1726 if (opcode == ASSERT_EXPR)
1728 if (values_equal_for_ipcp_p (input, operand))
1729 return input;
1730 else
1731 return NULL_TREE;
1734 if (!res_type)
1736 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1737 res_type = boolean_type_node;
1738 else if (expr_type_first_operand_type_p (opcode))
1739 res_type = TREE_TYPE (input);
1740 else
1741 return NULL_TREE;
1744 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1745 res = fold_unary (opcode, res_type, input);
1746 else
1747 res = fold_binary (opcode, res_type, input, operand);
1749 if (res && !is_gimple_ip_invariant (res))
1750 return NULL_TREE;
1752 return res;
1755 /* Return the result of a (possibly arithmetic) pass through jump function
1756 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1757 to which the result is passed. Return NULL_TREE if that cannot be
1758 determined or be considered an interprocedural invariant. */
1760 static tree
1761 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1762 tree res_type)
1764 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1765 input,
1766 ipa_get_jf_pass_through_operand (jfunc),
1767 res_type);
1770 /* Return the result of an ancestor jump function JFUNC on the constant value
1771 INPUT. Return NULL_TREE if that cannot be determined. */
1773 static tree
1774 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1776 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1777 if (TREE_CODE (input) == ADDR_EXPR)
1779 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1780 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1781 if (known_eq (off, 0))
1782 return input;
1783 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1784 return build1 (ADDR_EXPR, TREE_TYPE (input),
1785 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1786 build_int_cst (ptr_type_node, byte_offset)));
1788 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1789 && zerop (input))
1790 return input;
1791 else
1792 return NULL_TREE;
1795 /* Determine whether JFUNC evaluates to a single known constant value and if
1796 so, return it. Otherwise return NULL. INFO describes the caller node or
1797 the one it is inlined to, so that pass-through jump functions can be
1798 evaluated. PARM_TYPE is the type of the parameter to which the result is
1799 passed. */
1801 tree
1802 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1803 tree parm_type)
1805 if (jfunc->type == IPA_JF_CONST)
1806 return ipa_get_jf_constant (jfunc);
1807 else if (jfunc->type == IPA_JF_PASS_THROUGH
1808 || jfunc->type == IPA_JF_ANCESTOR)
1810 tree input;
1811 int idx;
1813 if (jfunc->type == IPA_JF_PASS_THROUGH)
1814 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1815 else
1816 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1818 if (info->ipcp_orig_node)
1819 input = info->known_csts[idx];
1820 else
1822 ipcp_lattice<tree> *lat;
1824 if (!info->lattices
1825 || idx >= ipa_get_param_count (info))
1826 return NULL_TREE;
1827 lat = ipa_get_scalar_lat (info, idx);
1828 if (!lat->is_single_const ())
1829 return NULL_TREE;
1830 input = lat->values->value;
1833 if (!input)
1834 return NULL_TREE;
1836 if (jfunc->type == IPA_JF_PASS_THROUGH)
1837 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1838 else
1839 return ipa_get_jf_ancestor_result (jfunc, input);
1841 else
1842 return NULL_TREE;
1845 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1846 that INFO describes the caller node or the one it is inlined to, CS is the
1847 call graph edge corresponding to JFUNC and CSIDX index of the described
1848 parameter. */
1850 ipa_polymorphic_call_context
1851 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1852 ipa_jump_func *jfunc)
1854 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1855 ipa_polymorphic_call_context ctx;
1856 ipa_polymorphic_call_context *edge_ctx
1857 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1859 if (edge_ctx && !edge_ctx->useless_p ())
1860 ctx = *edge_ctx;
1862 if (jfunc->type == IPA_JF_PASS_THROUGH
1863 || jfunc->type == IPA_JF_ANCESTOR)
1865 ipa_polymorphic_call_context srcctx;
1866 int srcidx;
1867 bool type_preserved = true;
1868 if (jfunc->type == IPA_JF_PASS_THROUGH)
1870 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1871 return ctx;
1872 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1873 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1875 else
1877 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1878 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1880 if (info->ipcp_orig_node)
1882 if (info->known_contexts.exists ())
1883 srcctx = info->known_contexts[srcidx];
1885 else
1887 if (!info->lattices
1888 || srcidx >= ipa_get_param_count (info))
1889 return ctx;
1890 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1891 lat = ipa_get_poly_ctx_lat (info, srcidx);
1892 if (!lat->is_single_const ())
1893 return ctx;
1894 srcctx = lat->values->value;
1896 if (srcctx.useless_p ())
1897 return ctx;
1898 if (jfunc->type == IPA_JF_ANCESTOR)
1899 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1900 if (!type_preserved)
1901 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1902 srcctx.combine_with (ctx);
1903 return srcctx;
1906 return ctx;
1909 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1910 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1911 the result is a range that is not VARYING nor UNDEFINED. */
1913 static bool
1914 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1915 const vrange &src_vr,
1916 enum tree_code operation,
1917 tree dst_type, tree src_type)
1919 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1920 return false;
1922 range_op_handler handler (operation);
1923 if (!handler)
1924 return false;
1926 Value_Range varying (dst_type);
1927 varying.set_varying (dst_type);
1929 return (handler.operand_check_p (dst_type, src_type, dst_type)
1930 && handler.fold_range (dst_vr, dst_type, src_vr, varying)
1931 && !dst_vr.varying_p ()
1932 && !dst_vr.undefined_p ());
1935 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1936 first be extracted onto a vrange. */
1938 static bool
1939 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1940 const ipa_vr &src_vr,
1941 enum tree_code operation,
1942 tree dst_type, tree src_type)
1944 Value_Range tmp;
1945 src_vr.get_vrange (tmp);
1946 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1947 dst_type, src_type);
1950 /* Determine range of JFUNC given that INFO describes the caller node or
1951 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1952 and PARM_TYPE of the parameter. */
1954 void
1955 ipa_value_range_from_jfunc (vrange &vr,
1956 ipa_node_params *info, cgraph_edge *cs,
1957 ipa_jump_func *jfunc, tree parm_type)
1959 vr.set_undefined ();
1961 if (jfunc->m_vr)
1962 ipa_vr_operation_and_type_effects (vr,
1963 *jfunc->m_vr,
1964 NOP_EXPR, parm_type,
1965 jfunc->m_vr->type ());
1966 if (vr.singleton_p ())
1967 return;
1968 if (jfunc->type == IPA_JF_PASS_THROUGH)
1970 int idx;
1971 ipcp_transformation *sum
1972 = ipcp_get_transformation_summary (cs->caller->inlined_to
1973 ? cs->caller->inlined_to
1974 : cs->caller);
1975 if (!sum || !sum->m_vr)
1976 return;
1978 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1980 if (!(*sum->m_vr)[idx].known_p ())
1981 return;
1982 tree vr_type = ipa_get_type (info, idx);
1983 Value_Range srcvr;
1984 (*sum->m_vr)[idx].get_vrange (srcvr);
1986 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1988 if (TREE_CODE_CLASS (operation) == tcc_unary)
1990 Value_Range res (vr_type);
1992 if (ipa_vr_operation_and_type_effects (res,
1993 srcvr,
1994 operation, parm_type,
1995 vr_type))
1996 vr.intersect (res);
1998 else
2000 Value_Range op_res (vr_type);
2001 Value_Range res (vr_type);
2002 tree op = ipa_get_jf_pass_through_operand (jfunc);
2003 Value_Range op_vr (vr_type);
2004 range_op_handler handler (operation);
2006 ipa_range_set_and_normalize (op_vr, op);
2008 if (!handler
2009 || !op_res.supports_type_p (vr_type)
2010 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
2011 op_res.set_varying (vr_type);
2013 if (ipa_vr_operation_and_type_effects (res,
2014 op_res,
2015 NOP_EXPR, parm_type,
2016 vr_type))
2017 vr.intersect (res);
2022 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2023 single known constant value and if so, return it. Otherwise return NULL.
2024 NODE and INFO describes the caller node or the one it is inlined to, and
2025 its related info. */
2027 tree
2028 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
2029 const ipa_agg_jf_item *item)
2031 tree value = NULL_TREE;
2032 int src_idx;
2034 if (item->offset < 0
2035 || item->jftype == IPA_JF_UNKNOWN
2036 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
2037 return NULL_TREE;
2039 if (item->jftype == IPA_JF_CONST)
2040 return item->value.constant;
2042 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2043 || item->jftype == IPA_JF_LOAD_AGG);
2045 src_idx = item->value.pass_through.formal_id;
2047 if (info->ipcp_orig_node)
2049 if (item->jftype == IPA_JF_PASS_THROUGH)
2050 value = info->known_csts[src_idx];
2051 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2053 ipa_argagg_value_list avl (ts);
2054 value = avl.get_value (src_idx,
2055 item->value.load_agg.offset / BITS_PER_UNIT,
2056 item->value.load_agg.by_ref);
2059 else if (info->lattices)
2061 class ipcp_param_lattices *src_plats
2062 = ipa_get_parm_lattices (info, src_idx);
2064 if (item->jftype == IPA_JF_PASS_THROUGH)
2066 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2068 if (!lat->is_single_const ())
2069 return NULL_TREE;
2071 value = lat->values->value;
2073 else if (src_plats->aggs
2074 && !src_plats->aggs_bottom
2075 && !src_plats->aggs_contain_variable
2076 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2078 struct ipcp_agg_lattice *aglat;
2080 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2082 if (aglat->offset > item->value.load_agg.offset)
2083 break;
2085 if (aglat->offset == item->value.load_agg.offset)
2087 if (aglat->is_single_const ())
2088 value = aglat->values->value;
2089 break;
2095 if (!value)
2096 return NULL_TREE;
2098 if (item->jftype == IPA_JF_LOAD_AGG)
2100 tree load_type = item->value.load_agg.type;
2101 tree value_type = TREE_TYPE (value);
2103 /* Ensure value type is compatible with load type. */
2104 if (!useless_type_conversion_p (load_type, value_type))
2105 return NULL_TREE;
2108 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2109 value,
2110 item->value.pass_through.operand,
2111 item->type);
2114 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2115 caller is inlined to) NODE which described by INFO and push the results to
2116 RES as describing values passed in parameter DST_INDEX. */
2118 void
2119 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2120 ipa_agg_jump_function *agg_jfunc,
2121 unsigned dst_index,
2122 vec<ipa_argagg_value> *res)
2124 unsigned prev_unit_offset = 0;
2125 bool first = true;
2127 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2129 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2130 if (!value)
2131 continue;
2133 ipa_argagg_value iav;
2134 iav.value = value;
2135 iav.unit_offset = item.offset / BITS_PER_UNIT;
2136 iav.index = dst_index;
2137 iav.by_ref = agg_jfunc->by_ref;
2138 iav.killed = 0;
2140 gcc_assert (first
2141 || iav.unit_offset > prev_unit_offset);
2142 prev_unit_offset = iav.unit_offset;
2143 first = false;
2145 res->safe_push (iav);
2149 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2150 bottom, not containing a variable component and without any known value at
2151 the same time. */
2153 DEBUG_FUNCTION void
2154 ipcp_verify_propagated_values (void)
2156 struct cgraph_node *node;
2158 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2160 ipa_node_params *info = ipa_node_params_sum->get (node);
2161 if (!opt_for_fn (node->decl, flag_ipa_cp)
2162 || !opt_for_fn (node->decl, optimize))
2163 continue;
2164 int i, count = ipa_get_param_count (info);
2166 for (i = 0; i < count; i++)
2168 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2170 if (!lat->bottom
2171 && !lat->contains_variable
2172 && lat->values_count == 0)
2174 if (dump_file)
2176 symtab->dump (dump_file);
2177 fprintf (dump_file, "\nIPA lattices after constant "
2178 "propagation, before gcc_unreachable:\n");
2179 print_all_lattices (dump_file, true, false);
2182 gcc_unreachable ();
2188 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2190 static bool
2191 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2192 ipa_polymorphic_call_context y)
2194 return x.equal_to (y);
2198 /* Add a new value source to the value represented by THIS, marking that a
2199 value comes from edge CS and (if the underlying jump function is a
2200 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2201 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2202 scalar value of the parameter itself or the offset within an aggregate. */
2204 template <typename valtype>
2205 void
2206 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2207 int src_idx, HOST_WIDE_INT offset)
2209 ipcp_value_source<valtype> *src;
2211 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2212 src->offset = offset;
2213 src->cs = cs;
2214 src->val = src_val;
2215 src->index = src_idx;
2217 src->next = sources;
2218 sources = src;
2221 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2222 SOURCE and clear all other fields. */
2224 static ipcp_value<tree> *
2225 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2227 ipcp_value<tree> *val;
2229 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2230 val->value = cst;
2231 val->self_recursion_generated_level = same_lat_gen_level;
2232 return val;
2235 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2236 value to SOURCE and clear all other fields. */
2238 static ipcp_value<ipa_polymorphic_call_context> *
2239 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2240 unsigned same_lat_gen_level)
2242 ipcp_value<ipa_polymorphic_call_context> *val;
2244 val = new (ipcp_poly_ctx_values_pool.allocate ())
2245 ipcp_value<ipa_polymorphic_call_context>();
2246 val->value = ctx;
2247 val->self_recursion_generated_level = same_lat_gen_level;
2248 return val;
2251 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2252 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2253 meaning. OFFSET -1 means the source is scalar and not a part of an
2254 aggregate. If non-NULL, VAL_P records address of existing or newly added
2255 ipcp_value.
2257 If the value is generated for a self-recursive call as a result of an
2258 arithmetic pass-through jump-function acting on a value in the same lattice,
2259 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2260 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2262 template <typename valtype>
2263 bool
2264 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2265 ipcp_value<valtype> *src_val,
2266 int src_idx, HOST_WIDE_INT offset,
2267 ipcp_value<valtype> **val_p,
2268 unsigned same_lat_gen_level)
2270 ipcp_value<valtype> *val, *last_val = NULL;
2272 if (val_p)
2273 *val_p = NULL;
2275 if (bottom)
2276 return false;
2278 for (val = values; val; last_val = val, val = val->next)
2279 if (values_equal_for_ipcp_p (val->value, newval))
2281 if (val_p)
2282 *val_p = val;
2284 if (val->self_recursion_generated_level < same_lat_gen_level)
2285 val->self_recursion_generated_level = same_lat_gen_level;
2287 if (ipa_edge_within_scc (cs))
2289 ipcp_value_source<valtype> *s;
2290 for (s = val->sources; s; s = s->next)
2291 if (s->cs == cs && s->val == src_val)
2292 break;
2293 if (s)
2294 return false;
2297 val->add_source (cs, src_val, src_idx, offset);
2298 return false;
2301 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2302 param_ipa_cp_value_list_size))
2304 /* We can only free sources, not the values themselves, because sources
2305 of other values in this SCC might point to them. */
2306 for (val = values; val; val = val->next)
2308 while (val->sources)
2310 ipcp_value_source<valtype> *src = val->sources;
2311 val->sources = src->next;
2312 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2315 values = NULL;
2316 return set_to_bottom ();
2319 values_count++;
2320 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2321 val->add_source (cs, src_val, src_idx, offset);
2322 val->next = NULL;
2324 /* Add the new value to end of value list, which can reduce iterations
2325 of propagation stage for recursive function. */
2326 if (last_val)
2327 last_val->next = val;
2328 else
2329 values = val;
2331 if (val_p)
2332 *val_p = val;
2334 return true;
2337 /* A helper function that returns result of operation specified by OPCODE on
2338 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2339 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2340 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2341 the result. */
2343 static tree
2344 get_val_across_arith_op (enum tree_code opcode,
2345 tree opnd1_type,
2346 tree opnd2,
2347 ipcp_value<tree> *src_val,
2348 tree res_type)
2350 tree opnd1 = src_val->value;
2352 /* Skip source values that is incompatible with specified type. */
2353 if (opnd1_type
2354 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2355 return NULL_TREE;
2357 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2360 /* Propagate values through an arithmetic transformation described by a jump
2361 function associated with edge CS, taking values from SRC_LAT and putting
2362 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2363 OPND2 is a constant value if transformation is a binary operation.
2364 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2365 a part of the aggregate. SRC_IDX is the index of the source parameter.
2366 RES_TYPE is the value type of result being propagated into. Return true if
2367 DEST_LAT changed. */
2369 static bool
2370 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2371 enum tree_code opcode,
2372 tree opnd1_type,
2373 tree opnd2,
2374 ipcp_lattice<tree> *src_lat,
2375 ipcp_lattice<tree> *dest_lat,
2376 HOST_WIDE_INT src_offset,
2377 int src_idx,
2378 tree res_type)
2380 ipcp_value<tree> *src_val;
2381 bool ret = false;
2383 /* Due to circular dependencies, propagating within an SCC through arithmetic
2384 transformation would create infinite number of values. But for
2385 self-feeding recursive function, we could allow propagation in a limited
2386 count, and this can enable a simple kind of recursive function versioning.
2387 For other scenario, we would just make lattices bottom. */
2388 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2390 int i;
2392 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2393 param_ipa_cp_max_recursive_depth);
2394 if (src_lat != dest_lat || max_recursive_depth < 1)
2395 return dest_lat->set_contains_variable ();
2397 /* No benefit if recursive execution is in low probability. */
2398 if (cs->sreal_frequency () * 100
2399 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2400 param_ipa_cp_min_recursive_probability))
2401 return dest_lat->set_contains_variable ();
2403 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2405 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2407 /* Now we do not use self-recursively generated value as propagation
2408 source, this is absolutely conservative, but could avoid explosion
2409 of lattice's value space, especially when one recursive function
2410 calls another recursive. */
2411 if (src_val->self_recursion_generated_p ())
2413 ipcp_value_source<tree> *s;
2415 /* If the lattice has already been propagated for the call site,
2416 no need to do that again. */
2417 for (s = src_val->sources; s; s = s->next)
2418 if (s->cs == cs)
2419 return dest_lat->set_contains_variable ();
2421 else
2422 val_seeds.safe_push (src_val);
2425 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2427 /* Recursively generate lattice values with a limited count. */
2428 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2430 for (int j = 1; j < max_recursive_depth; j++)
2432 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2433 src_val, res_type);
2434 if (!cstval
2435 || !ipacp_value_safe_for_type (res_type, cstval))
2436 break;
2438 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2439 src_offset, &src_val, j);
2440 gcc_checking_assert (src_val);
2443 ret |= dest_lat->set_contains_variable ();
2445 else
2446 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2448 /* Now we do not use self-recursively generated value as propagation
2449 source, otherwise it is easy to make value space of normal lattice
2450 overflow. */
2451 if (src_val->self_recursion_generated_p ())
2453 ret |= dest_lat->set_contains_variable ();
2454 continue;
2457 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2458 src_val, res_type);
2459 if (cstval
2460 && ipacp_value_safe_for_type (res_type, cstval))
2461 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2462 src_offset);
2463 else
2464 ret |= dest_lat->set_contains_variable ();
2467 return ret;
2470 /* Propagate values through a pass-through jump function JFUNC associated with
2471 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2472 is the index of the source parameter. PARM_TYPE is the type of the
2473 parameter to which the result is passed. */
2475 static bool
2476 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2477 ipcp_lattice<tree> *src_lat,
2478 ipcp_lattice<tree> *dest_lat, int src_idx,
2479 tree parm_type)
2481 return propagate_vals_across_arith_jfunc (cs,
2482 ipa_get_jf_pass_through_operation (jfunc),
2483 NULL_TREE,
2484 ipa_get_jf_pass_through_operand (jfunc),
2485 src_lat, dest_lat, -1, src_idx, parm_type);
2488 /* Propagate values through an ancestor jump function JFUNC associated with
2489 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2490 is the index of the source parameter. */
2492 static bool
2493 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2494 struct ipa_jump_func *jfunc,
2495 ipcp_lattice<tree> *src_lat,
2496 ipcp_lattice<tree> *dest_lat, int src_idx,
2497 tree param_type)
2499 ipcp_value<tree> *src_val;
2500 bool ret = false;
2502 if (ipa_edge_within_scc (cs))
2503 return dest_lat->set_contains_variable ();
2505 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2507 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2509 if (t && ipacp_value_safe_for_type (param_type, t))
2510 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2511 else
2512 ret |= dest_lat->set_contains_variable ();
2515 return ret;
2518 /* Propagate scalar values across jump function JFUNC that is associated with
2519 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2520 parameter to which the result is passed. */
2522 static bool
2523 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2524 struct ipa_jump_func *jfunc,
2525 ipcp_lattice<tree> *dest_lat,
2526 tree param_type)
2528 if (dest_lat->bottom)
2529 return false;
2531 if (jfunc->type == IPA_JF_CONST)
2533 tree val = ipa_get_jf_constant (jfunc);
2534 if (ipacp_value_safe_for_type (param_type, val))
2535 return dest_lat->add_value (val, cs, NULL, 0);
2536 else
2537 return dest_lat->set_contains_variable ();
2539 else if (jfunc->type == IPA_JF_PASS_THROUGH
2540 || jfunc->type == IPA_JF_ANCESTOR)
2542 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2543 ipcp_lattice<tree> *src_lat;
2544 int src_idx;
2545 bool ret;
2547 if (jfunc->type == IPA_JF_PASS_THROUGH)
2548 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2549 else
2550 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2552 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2553 if (src_lat->bottom)
2554 return dest_lat->set_contains_variable ();
2556 /* If we would need to clone the caller and cannot, do not propagate. */
2557 if (!ipcp_versionable_function_p (cs->caller)
2558 && (src_lat->contains_variable
2559 || (src_lat->values_count > 1)))
2560 return dest_lat->set_contains_variable ();
2562 if (jfunc->type == IPA_JF_PASS_THROUGH)
2563 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2564 dest_lat, src_idx,
2565 param_type);
2566 else
2567 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2568 src_idx, param_type);
2570 if (src_lat->contains_variable)
2571 ret |= dest_lat->set_contains_variable ();
2573 return ret;
2576 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2577 use it for indirect inlining), we should propagate them too. */
2578 return dest_lat->set_contains_variable ();
2581 /* Propagate scalar values across jump function JFUNC that is associated with
2582 edge CS and describes argument IDX and put the values into DEST_LAT. */
2584 static bool
2585 propagate_context_across_jump_function (cgraph_edge *cs,
2586 ipa_jump_func *jfunc, int idx,
2587 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2589 if (dest_lat->bottom)
2590 return false;
2591 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2592 bool ret = false;
2593 bool added_sth = false;
2594 bool type_preserved = true;
2596 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2597 = ipa_get_ith_polymorhic_call_context (args, idx);
2599 if (edge_ctx_ptr)
2600 edge_ctx = *edge_ctx_ptr;
2602 if (jfunc->type == IPA_JF_PASS_THROUGH
2603 || jfunc->type == IPA_JF_ANCESTOR)
2605 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2606 int src_idx;
2607 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2609 /* TODO: Once we figure out how to propagate speculations, it will
2610 probably be a good idea to switch to speculation if type_preserved is
2611 not set instead of punting. */
2612 if (jfunc->type == IPA_JF_PASS_THROUGH)
2614 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2615 goto prop_fail;
2616 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2617 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2619 else
2621 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2622 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2625 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2626 /* If we would need to clone the caller and cannot, do not propagate. */
2627 if (!ipcp_versionable_function_p (cs->caller)
2628 && (src_lat->contains_variable
2629 || (src_lat->values_count > 1)))
2630 goto prop_fail;
2632 ipcp_value<ipa_polymorphic_call_context> *src_val;
2633 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2635 ipa_polymorphic_call_context cur = src_val->value;
2637 if (!type_preserved)
2638 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2639 if (jfunc->type == IPA_JF_ANCESTOR)
2640 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2641 /* TODO: In cases we know how the context is going to be used,
2642 we can improve the result by passing proper OTR_TYPE. */
2643 cur.combine_with (edge_ctx);
2644 if (!cur.useless_p ())
2646 if (src_lat->contains_variable
2647 && !edge_ctx.equal_to (cur))
2648 ret |= dest_lat->set_contains_variable ();
2649 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2650 added_sth = true;
2655 prop_fail:
2656 if (!added_sth)
2658 if (!edge_ctx.useless_p ())
2659 ret |= dest_lat->add_value (edge_ctx, cs);
2660 else
2661 ret |= dest_lat->set_contains_variable ();
2664 return ret;
2667 /* Propagate bits across jfunc that is associated with
2668 edge cs and update dest_lattice accordingly. */
2670 bool
2671 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2672 ipa_jump_func *jfunc,
2673 ipcp_bits_lattice *dest_lattice)
2675 if (dest_lattice->bottom_p ())
2676 return false;
2678 enum availability availability;
2679 cgraph_node *callee = cs->callee->function_symbol (&availability);
2680 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2681 tree parm_type = ipa_get_type (callee_info, idx);
2683 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2684 transform for these cases. Similarly, we can have bad type mismatches
2685 with LTO, avoid doing anything with those too. */
2686 if (!parm_type
2687 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2691 "param %i of %s is NULL or unsuitable for bits propagation\n",
2692 idx, cs->callee->dump_name ());
2694 return dest_lattice->set_to_bottom ();
2697 unsigned precision = TYPE_PRECISION (parm_type);
2698 signop sgn = TYPE_SIGN (parm_type);
2700 if (jfunc->type == IPA_JF_PASS_THROUGH
2701 || jfunc->type == IPA_JF_ANCESTOR)
2703 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2704 tree operand = NULL_TREE;
2705 enum tree_code code;
2706 unsigned src_idx;
2707 bool keep_null = false;
2709 if (jfunc->type == IPA_JF_PASS_THROUGH)
2711 code = ipa_get_jf_pass_through_operation (jfunc);
2712 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2713 if (code != NOP_EXPR)
2714 operand = ipa_get_jf_pass_through_operand (jfunc);
2716 else
2718 code = POINTER_PLUS_EXPR;
2719 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2720 unsigned HOST_WIDE_INT offset
2721 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2722 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2723 operand = build_int_cstu (size_type_node, offset);
2726 class ipcp_param_lattices *src_lats
2727 = ipa_get_parm_lattices (caller_info, src_idx);
2729 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2730 for eg consider:
2731 int f(int x)
2733 g (x & 0xff);
2735 Assume lattice for x is bottom, however we can still propagate
2736 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2737 and we store it in jump function during analysis stage. */
2739 if (!src_lats->bits_lattice.bottom_p ())
2741 bool drop_all_ones
2742 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2744 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2745 sgn, code, operand, drop_all_ones);
2749 Value_Range vr (parm_type);
2750 if (jfunc->m_vr)
2752 jfunc->m_vr->get_vrange (vr);
2753 if (!vr.undefined_p () && !vr.varying_p ())
2755 irange &r = as_a <irange> (vr);
2756 irange_bitmask bm = r.get_bitmask ();
2757 widest_int mask
2758 = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2759 widest_int value
2760 = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2761 return dest_lattice->meet_with (value, mask, precision);
2764 return dest_lattice->set_to_bottom ();
2767 /* Propagate value range across jump function JFUNC that is associated with
2768 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2769 accordingly. */
2771 static bool
2772 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2773 class ipcp_param_lattices *dest_plats,
2774 tree param_type)
2776 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2778 if (dest_lat->bottom_p ())
2779 return false;
2781 if (!param_type
2782 || (!INTEGRAL_TYPE_P (param_type)
2783 && !POINTER_TYPE_P (param_type)))
2784 return dest_lat->set_to_bottom ();
2786 if (jfunc->type == IPA_JF_PASS_THROUGH)
2788 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2789 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2790 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2791 class ipcp_param_lattices *src_lats
2792 = ipa_get_parm_lattices (caller_info, src_idx);
2793 tree operand_type = ipa_get_type (caller_info, src_idx);
2795 if (src_lats->m_value_range.bottom_p ())
2796 return dest_lat->set_to_bottom ();
2798 Value_Range vr (operand_type);
2799 if (TREE_CODE_CLASS (operation) == tcc_unary)
2800 ipa_vr_operation_and_type_effects (vr,
2801 src_lats->m_value_range.m_vr,
2802 operation, param_type,
2803 operand_type);
2804 /* A crude way to prevent unbounded number of value range updates
2805 in SCC components. We should allow limited number of updates within
2806 SCC, too. */
2807 else if (!ipa_edge_within_scc (cs))
2809 tree op = ipa_get_jf_pass_through_operand (jfunc);
2810 Value_Range op_vr (TREE_TYPE (op));
2811 Value_Range op_res (operand_type);
2812 range_op_handler handler (operation);
2814 ipa_range_set_and_normalize (op_vr, op);
2816 if (!handler
2817 || !op_res.supports_type_p (operand_type)
2818 || !handler.fold_range (op_res, operand_type,
2819 src_lats->m_value_range.m_vr, op_vr))
2820 op_res.set_varying (operand_type);
2822 ipa_vr_operation_and_type_effects (vr,
2823 op_res,
2824 NOP_EXPR, param_type,
2825 operand_type);
2827 if (!vr.undefined_p () && !vr.varying_p ())
2829 if (jfunc->m_vr)
2831 Value_Range jvr (param_type);
2832 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2833 NOP_EXPR,
2834 param_type,
2835 jfunc->m_vr->type ()))
2836 vr.intersect (jvr);
2838 return dest_lat->meet_with (vr);
2841 else if (jfunc->type == IPA_JF_CONST)
2843 tree val = ipa_get_jf_constant (jfunc);
2844 if (TREE_CODE (val) == INTEGER_CST)
2846 val = fold_convert (param_type, val);
2847 if (TREE_OVERFLOW_P (val))
2848 val = drop_tree_overflow (val);
2850 Value_Range tmpvr (val, val);
2851 return dest_lat->meet_with (tmpvr);
2855 Value_Range vr (param_type);
2856 if (jfunc->m_vr
2857 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2858 param_type,
2859 jfunc->m_vr->type ()))
2860 return dest_lat->meet_with (vr);
2861 else
2862 return dest_lat->set_to_bottom ();
2865 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2866 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2867 other cases, return false). If there are no aggregate items, set
2868 aggs_by_ref to NEW_AGGS_BY_REF. */
2870 static bool
2871 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2872 bool new_aggs_by_ref)
2874 if (dest_plats->aggs)
2876 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2878 set_agg_lats_to_bottom (dest_plats);
2879 return true;
2882 else
2883 dest_plats->aggs_by_ref = new_aggs_by_ref;
2884 return false;
2887 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2888 already existing lattice for the given OFFSET and SIZE, marking all skipped
2889 lattices as containing variable and checking for overlaps. If there is no
2890 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2891 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2892 unless there are too many already. If there are two many, return false. If
2893 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2894 skipped lattices were newly marked as containing variable, set *CHANGE to
2895 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2897 static bool
2898 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2899 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2900 struct ipcp_agg_lattice ***aglat,
2901 bool pre_existing, bool *change, int max_agg_items)
2903 gcc_checking_assert (offset >= 0);
2905 while (**aglat && (**aglat)->offset < offset)
2907 if ((**aglat)->offset + (**aglat)->size > offset)
2909 set_agg_lats_to_bottom (dest_plats);
2910 return false;
2912 *change |= (**aglat)->set_contains_variable ();
2913 *aglat = &(**aglat)->next;
2916 if (**aglat && (**aglat)->offset == offset)
2918 if ((**aglat)->size != val_size)
2920 set_agg_lats_to_bottom (dest_plats);
2921 return false;
2923 gcc_assert (!(**aglat)->next
2924 || (**aglat)->next->offset >= offset + val_size);
2925 return true;
2927 else
2929 struct ipcp_agg_lattice *new_al;
2931 if (**aglat && (**aglat)->offset < offset + val_size)
2933 set_agg_lats_to_bottom (dest_plats);
2934 return false;
2936 if (dest_plats->aggs_count == max_agg_items)
2937 return false;
2938 dest_plats->aggs_count++;
2939 new_al = ipcp_agg_lattice_pool.allocate ();
2940 memset (new_al, 0, sizeof (*new_al));
2942 new_al->offset = offset;
2943 new_al->size = val_size;
2944 new_al->contains_variable = pre_existing;
2946 new_al->next = **aglat;
2947 **aglat = new_al;
2948 return true;
2952 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2953 containing an unknown value. */
2955 static bool
2956 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2958 bool ret = false;
2959 while (aglat)
2961 ret |= aglat->set_contains_variable ();
2962 aglat = aglat->next;
2964 return ret;
2967 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2968 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2969 parameter used for lattice value sources. Return true if DEST_PLATS changed
2970 in any way. */
2972 static bool
2973 merge_aggregate_lattices (struct cgraph_edge *cs,
2974 class ipcp_param_lattices *dest_plats,
2975 class ipcp_param_lattices *src_plats,
2976 int src_idx, HOST_WIDE_INT offset_delta)
2978 bool pre_existing = dest_plats->aggs != NULL;
2979 struct ipcp_agg_lattice **dst_aglat;
2980 bool ret = false;
2982 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2983 return true;
2984 if (src_plats->aggs_bottom)
2985 return set_agg_lats_contain_variable (dest_plats);
2986 if (src_plats->aggs_contain_variable)
2987 ret |= set_agg_lats_contain_variable (dest_plats);
2988 dst_aglat = &dest_plats->aggs;
2990 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2991 param_ipa_max_agg_items);
2992 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2993 src_aglat;
2994 src_aglat = src_aglat->next)
2996 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2998 if (new_offset < 0)
2999 continue;
3000 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
3001 &dst_aglat, pre_existing, &ret, max_agg_items))
3003 struct ipcp_agg_lattice *new_al = *dst_aglat;
3005 dst_aglat = &(*dst_aglat)->next;
3006 if (src_aglat->bottom)
3008 ret |= new_al->set_contains_variable ();
3009 continue;
3011 if (src_aglat->contains_variable)
3012 ret |= new_al->set_contains_variable ();
3013 for (ipcp_value<tree> *val = src_aglat->values;
3014 val;
3015 val = val->next)
3016 ret |= new_al->add_value (val->value, cs, val, src_idx,
3017 src_aglat->offset);
3019 else if (dest_plats->aggs_bottom)
3020 return true;
3022 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
3023 return ret;
3026 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3027 pass-through JFUNC and if so, whether it has conform and conforms to the
3028 rules about propagating values passed by reference. */
3030 static bool
3031 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
3032 struct ipa_jump_func *jfunc)
3034 return src_plats->aggs
3035 && (!src_plats->aggs_by_ref
3036 || ipa_get_jf_pass_through_agg_preserved (jfunc));
3039 /* Propagate values through ITEM, jump function for a part of an aggregate,
3040 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3041 associated with the jump function. Return true if AGLAT changed in any
3042 way. */
3044 static bool
3045 propagate_aggregate_lattice (struct cgraph_edge *cs,
3046 struct ipa_agg_jf_item *item,
3047 struct ipcp_agg_lattice *aglat)
3049 class ipa_node_params *caller_info;
3050 class ipcp_param_lattices *src_plats;
3051 struct ipcp_lattice<tree> *src_lat;
3052 HOST_WIDE_INT src_offset;
3053 int src_idx;
3054 tree load_type;
3055 bool ret;
3057 if (item->jftype == IPA_JF_CONST)
3059 tree value = item->value.constant;
3061 gcc_checking_assert (is_gimple_ip_invariant (value));
3062 return aglat->add_value (value, cs, NULL, 0);
3065 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
3066 || item->jftype == IPA_JF_LOAD_AGG);
3068 caller_info = ipa_node_params_sum->get (cs->caller);
3069 src_idx = item->value.pass_through.formal_id;
3070 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3072 if (item->jftype == IPA_JF_PASS_THROUGH)
3074 load_type = NULL_TREE;
3075 src_lat = &src_plats->itself;
3076 src_offset = -1;
3078 else
3080 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3081 struct ipcp_agg_lattice *src_aglat;
3083 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3084 if (src_aglat->offset >= load_offset)
3085 break;
3087 load_type = item->value.load_agg.type;
3088 if (!src_aglat
3089 || src_aglat->offset > load_offset
3090 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3091 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3092 return aglat->set_contains_variable ();
3094 src_lat = src_aglat;
3095 src_offset = load_offset;
3098 if (src_lat->bottom
3099 || (!ipcp_versionable_function_p (cs->caller)
3100 && !src_lat->is_single_const ()))
3101 return aglat->set_contains_variable ();
3103 ret = propagate_vals_across_arith_jfunc (cs,
3104 item->value.pass_through.operation,
3105 load_type,
3106 item->value.pass_through.operand,
3107 src_lat, aglat,
3108 src_offset,
3109 src_idx,
3110 item->type);
3112 if (src_lat->contains_variable)
3113 ret |= aglat->set_contains_variable ();
3115 return ret;
3118 /* Propagate scalar values across jump function JFUNC that is associated with
3119 edge CS and put the values into DEST_LAT. */
3121 static bool
3122 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3123 struct ipa_jump_func *jfunc,
3124 class ipcp_param_lattices *dest_plats)
3126 bool ret = false;
3128 if (dest_plats->aggs_bottom)
3129 return false;
3131 if (jfunc->type == IPA_JF_PASS_THROUGH
3132 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3134 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3135 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3136 class ipcp_param_lattices *src_plats;
3138 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3139 if (agg_pass_through_permissible_p (src_plats, jfunc))
3141 /* Currently we do not produce clobber aggregate jump
3142 functions, replace with merging when we do. */
3143 gcc_assert (!jfunc->agg.items);
3144 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3145 src_idx, 0);
3146 return ret;
3149 else if (jfunc->type == IPA_JF_ANCESTOR
3150 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3152 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3153 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3154 class ipcp_param_lattices *src_plats;
3156 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3157 if (src_plats->aggs && src_plats->aggs_by_ref)
3159 /* Currently we do not produce clobber aggregate jump
3160 functions, replace with merging when we do. */
3161 gcc_assert (!jfunc->agg.items);
3162 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3163 ipa_get_jf_ancestor_offset (jfunc));
3165 else if (!src_plats->aggs_by_ref)
3166 ret |= set_agg_lats_to_bottom (dest_plats);
3167 else
3168 ret |= set_agg_lats_contain_variable (dest_plats);
3169 return ret;
3172 if (jfunc->agg.items)
3174 bool pre_existing = dest_plats->aggs != NULL;
3175 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3176 struct ipa_agg_jf_item *item;
3177 int i;
3179 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3180 return true;
3182 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3183 param_ipa_max_agg_items);
3184 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3186 HOST_WIDE_INT val_size;
3188 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3189 continue;
3190 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3192 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3193 &aglat, pre_existing, &ret, max_agg_items))
3195 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3196 aglat = &(*aglat)->next;
3198 else if (dest_plats->aggs_bottom)
3199 return true;
3202 ret |= set_chain_of_aglats_contains_variable (*aglat);
3204 else
3205 ret |= set_agg_lats_contain_variable (dest_plats);
3207 return ret;
3210 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3211 non-thunk) destination, the call passes through a thunk. */
3213 static bool
3214 call_passes_through_thunk (cgraph_edge *cs)
3216 cgraph_node *alias_or_thunk = cs->callee;
3217 while (alias_or_thunk->alias)
3218 alias_or_thunk = alias_or_thunk->get_alias_target ();
3219 return alias_or_thunk->thunk;
3222 /* Propagate constants from the caller to the callee of CS. INFO describes the
3223 caller. */
3225 static bool
3226 propagate_constants_across_call (struct cgraph_edge *cs)
3228 class ipa_node_params *callee_info;
3229 enum availability availability;
3230 cgraph_node *callee;
3231 class ipa_edge_args *args;
3232 bool ret = false;
3233 int i, args_count, parms_count;
3235 callee = cs->callee->function_symbol (&availability);
3236 if (!callee->definition)
3237 return false;
3238 gcc_checking_assert (callee->has_gimple_body_p ());
3239 callee_info = ipa_node_params_sum->get (callee);
3240 if (!callee_info)
3241 return false;
3243 args = ipa_edge_args_sum->get (cs);
3244 parms_count = ipa_get_param_count (callee_info);
3245 if (parms_count == 0)
3246 return false;
3247 if (!args
3248 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3249 || !opt_for_fn (cs->caller->decl, optimize))
3251 for (i = 0; i < parms_count; i++)
3252 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3253 i));
3254 return ret;
3256 args_count = ipa_get_cs_argument_count (args);
3258 /* If this call goes through a thunk we must not propagate to the first (0th)
3259 parameter. However, we might need to uncover a thunk from below a series
3260 of aliases first. */
3261 if (call_passes_through_thunk (cs))
3263 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3264 0));
3265 i = 1;
3267 else
3268 i = 0;
3270 for (; (i < args_count) && (i < parms_count); i++)
3272 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3273 class ipcp_param_lattices *dest_plats;
3274 tree param_type = ipa_get_type (callee_info, i);
3276 dest_plats = ipa_get_parm_lattices (callee_info, i);
3277 if (availability == AVAIL_INTERPOSABLE)
3278 ret |= set_all_contains_variable (dest_plats);
3279 else
3281 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3282 &dest_plats->itself,
3283 param_type);
3284 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3285 &dest_plats->ctxlat);
3287 |= propagate_bits_across_jump_function (cs, i, jump_func,
3288 &dest_plats->bits_lattice);
3289 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3290 dest_plats);
3291 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3292 ret |= propagate_vr_across_jump_function (cs, jump_func,
3293 dest_plats, param_type);
3294 else
3295 ret |= dest_plats->m_value_range.set_to_bottom ();
3298 for (; i < parms_count; i++)
3299 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3301 return ret;
3304 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3305 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3306 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3307 KNOWN_AGGS is ignored. */
3309 static tree
3310 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3311 const vec<tree> &known_csts,
3312 const vec<ipa_polymorphic_call_context> &known_contexts,
3313 const ipa_argagg_value_list &avs,
3314 bool *speculative)
3316 int param_index = ie->indirect_info->param_index;
3317 HOST_WIDE_INT anc_offset;
3318 tree t = NULL;
3319 tree target = NULL;
3321 *speculative = false;
3323 if (param_index == -1)
3324 return NULL_TREE;
3326 if (!ie->indirect_info->polymorphic)
3328 tree t = NULL;
3330 if (ie->indirect_info->agg_contents)
3332 t = NULL;
3333 if ((unsigned) param_index < known_csts.length ()
3334 && known_csts[param_index])
3335 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3336 ie->indirect_info->offset,
3337 ie->indirect_info->by_ref);
3339 if (!t && ie->indirect_info->guaranteed_unmodified)
3340 t = avs.get_value (param_index,
3341 ie->indirect_info->offset / BITS_PER_UNIT,
3342 ie->indirect_info->by_ref);
3344 else if ((unsigned) param_index < known_csts.length ())
3345 t = known_csts[param_index];
3347 if (t
3348 && TREE_CODE (t) == ADDR_EXPR
3349 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3350 return TREE_OPERAND (t, 0);
3351 else
3352 return NULL_TREE;
3355 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3356 return NULL_TREE;
3358 gcc_assert (!ie->indirect_info->agg_contents);
3359 gcc_assert (!ie->indirect_info->by_ref);
3360 anc_offset = ie->indirect_info->offset;
3362 t = NULL;
3364 if ((unsigned) param_index < known_csts.length ()
3365 && known_csts[param_index])
3366 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3367 ie->indirect_info->offset, true);
3369 /* Try to work out value of virtual table pointer value in replacements. */
3370 /* or known aggregate values. */
3371 if (!t)
3372 t = avs.get_value (param_index,
3373 ie->indirect_info->offset / BITS_PER_UNIT,
3374 true);
3376 /* If we found the virtual table pointer, lookup the target. */
3377 if (t)
3379 tree vtable;
3380 unsigned HOST_WIDE_INT offset;
3381 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3383 bool can_refer;
3384 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3385 vtable, offset, &can_refer);
3386 if (can_refer)
3388 if (!target
3389 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3390 || !possible_polymorphic_call_target_p
3391 (ie, cgraph_node::get (target)))
3393 /* Do not speculate builtin_unreachable, it is stupid! */
3394 if (ie->indirect_info->vptr_changed)
3395 return NULL;
3396 target = ipa_impossible_devirt_target (ie, target);
3398 *speculative = ie->indirect_info->vptr_changed;
3399 if (!*speculative)
3400 return target;
3405 /* Do we know the constant value of pointer? */
3406 if (!t && (unsigned) param_index < known_csts.length ())
3407 t = known_csts[param_index];
3409 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3411 ipa_polymorphic_call_context context;
3412 if (known_contexts.length () > (unsigned int) param_index)
3414 context = known_contexts[param_index];
3415 context.offset_by (anc_offset);
3416 if (ie->indirect_info->vptr_changed)
3417 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3418 ie->indirect_info->otr_type);
3419 if (t)
3421 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3422 (t, ie->indirect_info->otr_type, anc_offset);
3423 if (!ctx2.useless_p ())
3424 context.combine_with (ctx2, ie->indirect_info->otr_type);
3427 else if (t)
3429 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3430 anc_offset);
3431 if (ie->indirect_info->vptr_changed)
3432 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3433 ie->indirect_info->otr_type);
3435 else
3436 return NULL_TREE;
3438 vec <cgraph_node *>targets;
3439 bool final;
3441 targets = possible_polymorphic_call_targets
3442 (ie->indirect_info->otr_type,
3443 ie->indirect_info->otr_token,
3444 context, &final);
3445 if (!final || targets.length () > 1)
3447 struct cgraph_node *node;
3448 if (*speculative)
3449 return target;
3450 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3451 || ie->speculative || !ie->maybe_hot_p ())
3452 return NULL;
3453 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3454 ie->indirect_info->otr_token,
3455 context);
3456 if (node)
3458 *speculative = true;
3459 target = node->decl;
3461 else
3462 return NULL;
3464 else
3466 *speculative = false;
3467 if (targets.length () == 1)
3468 target = targets[0]->decl;
3469 else
3470 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3473 if (target && !possible_polymorphic_call_target_p (ie,
3474 cgraph_node::get (target)))
3476 if (*speculative)
3477 return NULL;
3478 target = ipa_impossible_devirt_target (ie, target);
3481 return target;
3484 /* If an indirect edge IE can be turned into a direct one based on data in
3485 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3486 whether the discovered target is only speculative guess. */
3488 tree
3489 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3490 ipa_call_arg_values *avals,
3491 bool *speculative)
3493 ipa_argagg_value_list avl (avals);
3494 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3495 avals->m_known_contexts,
3496 avl, speculative);
3499 /* Calculate devirtualization time bonus for NODE, assuming we know information
3500 about arguments stored in AVALS. */
3502 static int
3503 devirtualization_time_bonus (struct cgraph_node *node,
3504 ipa_auto_call_arg_values *avals)
3506 struct cgraph_edge *ie;
3507 int res = 0;
3509 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3511 struct cgraph_node *callee;
3512 class ipa_fn_summary *isummary;
3513 enum availability avail;
3514 tree target;
3515 bool speculative;
3517 ipa_argagg_value_list avl (avals);
3518 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3519 avals->m_known_contexts,
3520 avl, &speculative);
3521 if (!target)
3522 continue;
3524 /* Only bare minimum benefit for clearly un-inlineable targets. */
3525 res += 1;
3526 callee = cgraph_node::get (target);
3527 if (!callee || !callee->definition)
3528 continue;
3529 callee = callee->function_symbol (&avail);
3530 if (avail < AVAIL_AVAILABLE)
3531 continue;
3532 isummary = ipa_fn_summaries->get (callee);
3533 if (!isummary || !isummary->inlinable)
3534 continue;
3536 int size = ipa_size_summaries->get (callee)->size;
3537 /* FIXME: The values below need re-considering and perhaps also
3538 integrating into the cost metrics, at lest in some very basic way. */
3539 int max_inline_insns_auto
3540 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3541 if (size <= max_inline_insns_auto / 4)
3542 res += 31 / ((int)speculative + 1);
3543 else if (size <= max_inline_insns_auto / 2)
3544 res += 15 / ((int)speculative + 1);
3545 else if (size <= max_inline_insns_auto
3546 || DECL_DECLARED_INLINE_P (callee->decl))
3547 res += 7 / ((int)speculative + 1);
3550 return res;
3553 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3555 static int
3556 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3558 int result = 0;
3559 ipa_hints hints = estimates.hints;
3560 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3561 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3563 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3565 if (hints & INLINE_HINT_loop_iterations)
3566 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3568 if (hints & INLINE_HINT_loop_stride)
3569 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3571 return result;
3574 /* If there is a reason to penalize the function described by INFO in the
3575 cloning goodness evaluation, do so. */
3577 static inline sreal
3578 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3579 sreal evaluation)
3581 if (info->node_within_scc && !info->node_is_self_scc)
3582 evaluation = (evaluation
3583 * (100 - opt_for_fn (node->decl,
3584 param_ipa_cp_recursion_penalty))) / 100;
3586 if (info->node_calling_single_call)
3587 evaluation = (evaluation
3588 * (100 - opt_for_fn (node->decl,
3589 param_ipa_cp_single_call_penalty)))
3590 / 100;
3592 return evaluation;
3595 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3596 and SIZE_COST and with the sum of frequencies of incoming edges to the
3597 potential new clone in FREQUENCIES. */
3599 static bool
3600 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3601 sreal freq_sum, profile_count count_sum,
3602 int size_cost)
3604 if (time_benefit == 0
3605 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3606 || node->optimize_for_size_p ())
3607 return false;
3609 gcc_assert (size_cost > 0);
3611 ipa_node_params *info = ipa_node_params_sum->get (node);
3612 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3613 if (count_sum.nonzero_p ())
3615 gcc_assert (base_count.nonzero_p ());
3616 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3617 sreal evaluation = (time_benefit * factor) / size_cost;
3618 evaluation = incorporate_penalties (node, info, evaluation);
3619 evaluation *= 1000;
3621 if (dump_file && (dump_flags & TDF_DETAILS))
3623 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3624 "size: %i, count_sum: ", time_benefit.to_double (),
3625 size_cost);
3626 count_sum.dump (dump_file);
3627 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3628 info->node_within_scc
3629 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3630 info->node_calling_single_call ? ", single_call" : "",
3631 evaluation.to_double (), eval_threshold);
3634 return evaluation.to_int () >= eval_threshold;
3636 else
3638 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3639 evaluation = incorporate_penalties (node, info, evaluation);
3640 evaluation *= 1000;
3642 if (dump_file && (dump_flags & TDF_DETAILS))
3643 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3644 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3645 "threshold: %i\n",
3646 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3647 info->node_within_scc
3648 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3649 info->node_calling_single_call ? ", single_call" : "",
3650 evaluation.to_double (), eval_threshold);
3652 return evaluation.to_int () >= eval_threshold;
3656 /* Grow vectors in AVALS and fill them with information about values of
3657 parameters that are known to be independent of the context. Only calculate
3658 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3659 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3660 parameters will be stored in it.
3662 TODO: Also grow context independent value range vectors. */
3664 static bool
3665 gather_context_independent_values (class ipa_node_params *info,
3666 ipa_auto_call_arg_values *avals,
3667 bool calculate_aggs,
3668 int *removable_params_cost)
3670 int i, count = ipa_get_param_count (info);
3671 bool ret = false;
3673 avals->m_known_vals.safe_grow_cleared (count, true);
3674 avals->m_known_contexts.safe_grow_cleared (count, true);
3676 if (removable_params_cost)
3677 *removable_params_cost = 0;
3679 for (i = 0; i < count; i++)
3681 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3682 ipcp_lattice<tree> *lat = &plats->itself;
3684 if (lat->is_single_const ())
3686 ipcp_value<tree> *val = lat->values;
3687 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3688 avals->m_known_vals[i] = val->value;
3689 if (removable_params_cost)
3690 *removable_params_cost
3691 += estimate_move_cost (TREE_TYPE (val->value), false);
3692 ret = true;
3694 else if (removable_params_cost
3695 && !ipa_is_param_used (info, i))
3696 *removable_params_cost
3697 += ipa_get_param_move_cost (info, i);
3699 if (!ipa_is_param_used (info, i))
3700 continue;
3702 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3703 /* Do not account known context as reason for cloning. We can see
3704 if it permits devirtualization. */
3705 if (ctxlat->is_single_const ())
3706 avals->m_known_contexts[i] = ctxlat->values->value;
3708 if (calculate_aggs)
3709 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3712 return ret;
3715 /* Perform time and size measurement of NODE with the context given in AVALS,
3716 calculate the benefit compared to the node without specialization and store
3717 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3718 context-independent or unused removable parameters and EST_MOVE_COST, the
3719 estimated movement of the considered parameter. */
3721 static void
3722 perform_estimation_of_a_value (cgraph_node *node,
3723 ipa_auto_call_arg_values *avals,
3724 int removable_params_cost, int est_move_cost,
3725 ipcp_value_base *val)
3727 sreal time_benefit;
3728 ipa_call_estimates estimates;
3730 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3732 /* Extern inline functions have no cloning local time benefits because they
3733 will be inlined anyway. The only reason to clone them is if it enables
3734 optimization in any of the functions they call. */
3735 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3736 time_benefit = 0;
3737 else
3738 time_benefit = (estimates.nonspecialized_time - estimates.time)
3739 + (devirtualization_time_bonus (node, avals)
3740 + hint_time_bonus (node, estimates)
3741 + removable_params_cost + est_move_cost);
3743 int size = estimates.size;
3744 gcc_checking_assert (size >=0);
3745 /* The inliner-heuristics based estimates may think that in certain
3746 contexts some functions do not have any size at all but we want
3747 all specializations to have at least a tiny cost, not least not to
3748 divide by zero. */
3749 if (size == 0)
3750 size = 1;
3752 val->local_time_benefit = time_benefit;
3753 val->local_size_cost = size;
3756 /* Get the overall limit oof growth based on parameters extracted from growth.
3757 it does not really make sense to mix functions with different overall growth
3758 limits but it is possible and if it happens, we do not want to select one
3759 limit at random. */
3761 static long
3762 get_max_overall_size (cgraph_node *node)
3764 long max_new_size = orig_overall_size;
3765 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3766 if (max_new_size < large_unit)
3767 max_new_size = large_unit;
3768 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3769 max_new_size += max_new_size * unit_growth / 100 + 1;
3770 return max_new_size;
3773 /* Return true if NODE should be cloned just for a parameter removal, possibly
3774 dumping a reason if not. */
3776 static bool
3777 clone_for_param_removal_p (cgraph_node *node)
3779 if (!node->can_change_signature)
3781 if (dump_file && (dump_flags & TDF_DETAILS))
3782 fprintf (dump_file, " Not considering cloning to remove parameters, "
3783 "function cannot change signature.\n");
3784 return false;
3786 if (node->can_be_local_p ())
3788 if (dump_file && (dump_flags & TDF_DETAILS))
3789 fprintf (dump_file, " Not considering cloning to remove parameters, "
3790 "IPA-SRA can do it potentially better.\n");
3791 return false;
3793 return true;
3796 /* Iterate over known values of parameters of NODE and estimate the local
3797 effects in terms of time and size they have. */
3799 static void
3800 estimate_local_effects (struct cgraph_node *node)
3802 ipa_node_params *info = ipa_node_params_sum->get (node);
3803 int count = ipa_get_param_count (info);
3804 bool always_const;
3805 int removable_params_cost;
3807 if (!count || !ipcp_versionable_function_p (node))
3808 return;
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3811 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3813 ipa_auto_call_arg_values avals;
3814 always_const = gather_context_independent_values (info, &avals, true,
3815 &removable_params_cost);
3816 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3817 if (always_const || devirt_bonus
3818 || (removable_params_cost && clone_for_param_removal_p (node)))
3820 struct caller_statistics stats;
3821 ipa_call_estimates estimates;
3823 init_caller_stats (&stats);
3824 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3825 false);
3826 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3827 sreal time = estimates.nonspecialized_time - estimates.time;
3828 time += devirt_bonus;
3829 time += hint_time_bonus (node, estimates);
3830 time += removable_params_cost;
3831 int size = estimates.size - stats.n_calls * removable_params_cost;
3833 if (dump_file)
3834 fprintf (dump_file, " - context independent values, size: %i, "
3835 "time_benefit: %f\n", size, (time).to_double ());
3837 if (size <= 0 || node->local)
3839 info->do_clone_for_all_contexts = true;
3841 if (dump_file)
3842 fprintf (dump_file, " Decided to specialize for all "
3843 "known contexts, code not going to grow.\n");
3845 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3846 stats.count_sum, size))
3848 if (size + overall_size <= get_max_overall_size (node))
3850 info->do_clone_for_all_contexts = true;
3851 overall_size += size;
3853 if (dump_file)
3854 fprintf (dump_file, " Decided to specialize for all "
3855 "known contexts, growth (to %li) deemed "
3856 "beneficial.\n", overall_size);
3858 else if (dump_file && (dump_flags & TDF_DETAILS))
3859 fprintf (dump_file, " Not cloning for all contexts because "
3860 "maximum unit size would be reached with %li.\n",
3861 size + overall_size);
3863 else if (dump_file && (dump_flags & TDF_DETAILS))
3864 fprintf (dump_file, " Not cloning for all contexts because "
3865 "!good_cloning_opportunity_p.\n");
3869 for (int i = 0; i < count; i++)
3871 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3872 ipcp_lattice<tree> *lat = &plats->itself;
3873 ipcp_value<tree> *val;
3875 if (lat->bottom
3876 || !lat->values
3877 || avals.m_known_vals[i])
3878 continue;
3880 for (val = lat->values; val; val = val->next)
3882 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3883 avals.m_known_vals[i] = val->value;
3885 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3886 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3887 emc, val);
3889 if (dump_file && (dump_flags & TDF_DETAILS))
3891 fprintf (dump_file, " - estimates for value ");
3892 print_ipcp_constant_value (dump_file, val->value);
3893 fprintf (dump_file, " for ");
3894 ipa_dump_param (dump_file, info, i);
3895 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3896 val->local_time_benefit.to_double (),
3897 val->local_size_cost);
3900 avals.m_known_vals[i] = NULL_TREE;
3903 for (int i = 0; i < count; i++)
3905 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3907 if (!plats->virt_call)
3908 continue;
3910 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3911 ipcp_value<ipa_polymorphic_call_context> *val;
3913 if (ctxlat->bottom
3914 || !ctxlat->values
3915 || !avals.m_known_contexts[i].useless_p ())
3916 continue;
3918 for (val = ctxlat->values; val; val = val->next)
3920 avals.m_known_contexts[i] = val->value;
3921 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3922 0, val);
3924 if (dump_file && (dump_flags & TDF_DETAILS))
3926 fprintf (dump_file, " - estimates for polymorphic context ");
3927 print_ipcp_constant_value (dump_file, val->value);
3928 fprintf (dump_file, " for ");
3929 ipa_dump_param (dump_file, info, i);
3930 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3931 val->local_time_benefit.to_double (),
3932 val->local_size_cost);
3935 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3938 unsigned all_ctx_len = avals.m_known_aggs.length ();
3939 auto_vec<ipa_argagg_value, 32> all_ctx;
3940 all_ctx.reserve_exact (all_ctx_len);
3941 all_ctx.splice (avals.m_known_aggs);
3942 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3944 unsigned j = 0;
3945 for (int index = 0; index < count; index++)
3947 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3949 if (plats->aggs_bottom || !plats->aggs)
3950 continue;
3952 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3954 ipcp_value<tree> *val;
3955 if (aglat->bottom || !aglat->values
3956 /* If the following is true, the one value is already part of all
3957 context estimations. */
3958 || (!plats->aggs_contain_variable
3959 && aglat->is_single_const ()))
3960 continue;
3962 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3963 while (j < all_ctx_len
3964 && (all_ctx[j].index < index
3965 || (all_ctx[j].index == index
3966 && all_ctx[j].unit_offset < unit_offset)))
3968 avals.m_known_aggs[j] = all_ctx[j];
3969 j++;
3972 for (unsigned k = j; k < all_ctx_len; k++)
3973 avals.m_known_aggs[k+1] = all_ctx[k];
3975 for (val = aglat->values; val; val = val->next)
3977 avals.m_known_aggs[j].value = val->value;
3978 avals.m_known_aggs[j].unit_offset = unit_offset;
3979 avals.m_known_aggs[j].index = index;
3980 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3981 avals.m_known_aggs[j].killed = false;
3983 perform_estimation_of_a_value (node, &avals,
3984 removable_params_cost, 0, val);
3986 if (dump_file && (dump_flags & TDF_DETAILS))
3988 fprintf (dump_file, " - estimates for value ");
3989 print_ipcp_constant_value (dump_file, val->value);
3990 fprintf (dump_file, " for ");
3991 ipa_dump_param (dump_file, info, index);
3992 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3993 "]: time_benefit: %g, size: %i\n",
3994 plats->aggs_by_ref ? "ref " : "",
3995 aglat->offset,
3996 val->local_time_benefit.to_double (),
3997 val->local_size_cost);
4005 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
4006 topological sort of values. */
4008 template <typename valtype>
4009 void
4010 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
4012 ipcp_value_source<valtype> *src;
4014 if (cur_val->dfs)
4015 return;
4017 dfs_counter++;
4018 cur_val->dfs = dfs_counter;
4019 cur_val->low_link = dfs_counter;
4021 cur_val->topo_next = stack;
4022 stack = cur_val;
4023 cur_val->on_stack = true;
4025 for (src = cur_val->sources; src; src = src->next)
4026 if (src->val)
4028 if (src->val->dfs == 0)
4030 add_val (src->val);
4031 if (src->val->low_link < cur_val->low_link)
4032 cur_val->low_link = src->val->low_link;
4034 else if (src->val->on_stack
4035 && src->val->dfs < cur_val->low_link)
4036 cur_val->low_link = src->val->dfs;
4039 if (cur_val->dfs == cur_val->low_link)
4041 ipcp_value<valtype> *v, *scc_list = NULL;
4045 v = stack;
4046 stack = v->topo_next;
4047 v->on_stack = false;
4048 v->scc_no = cur_val->dfs;
4050 v->scc_next = scc_list;
4051 scc_list = v;
4053 while (v != cur_val);
4055 cur_val->topo_next = values_topo;
4056 values_topo = cur_val;
4060 /* Add all values in lattices associated with NODE to the topological sort if
4061 they are not there yet. */
4063 static void
4064 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
4066 ipa_node_params *info = ipa_node_params_sum->get (node);
4067 int i, count = ipa_get_param_count (info);
4069 for (i = 0; i < count; i++)
4071 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4072 ipcp_lattice<tree> *lat = &plats->itself;
4073 struct ipcp_agg_lattice *aglat;
4075 if (!lat->bottom)
4077 ipcp_value<tree> *val;
4078 for (val = lat->values; val; val = val->next)
4079 topo->constants.add_val (val);
4082 if (!plats->aggs_bottom)
4083 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4084 if (!aglat->bottom)
4086 ipcp_value<tree> *val;
4087 for (val = aglat->values; val; val = val->next)
4088 topo->constants.add_val (val);
4091 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4092 if (!ctxlat->bottom)
4094 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4095 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4096 topo->contexts.add_val (ctxval);
4101 /* One pass of constants propagation along the call graph edges, from callers
4102 to callees (requires topological ordering in TOPO), iterate over strongly
4103 connected components. */
4105 static void
4106 propagate_constants_topo (class ipa_topo_info *topo)
4108 int i;
4110 for (i = topo->nnodes - 1; i >= 0; i--)
4112 unsigned j;
4113 struct cgraph_node *v, *node = topo->order[i];
4114 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4116 /* First, iteratively propagate within the strongly connected component
4117 until all lattices stabilize. */
4118 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4119 if (v->has_gimple_body_p ())
4121 if (opt_for_fn (v->decl, flag_ipa_cp)
4122 && opt_for_fn (v->decl, optimize))
4123 push_node_to_stack (topo, v);
4124 /* When V is not optimized, we can not push it to stack, but
4125 still we need to set all its callees lattices to bottom. */
4126 else
4128 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4129 propagate_constants_across_call (cs);
4133 v = pop_node_from_stack (topo);
4134 while (v)
4136 struct cgraph_edge *cs;
4137 class ipa_node_params *info = NULL;
4138 bool self_scc = true;
4140 for (cs = v->callees; cs; cs = cs->next_callee)
4141 if (ipa_edge_within_scc (cs))
4143 cgraph_node *callee = cs->callee->function_symbol ();
4145 if (v != callee)
4146 self_scc = false;
4148 if (!info)
4150 info = ipa_node_params_sum->get (v);
4151 info->node_within_scc = true;
4154 if (propagate_constants_across_call (cs))
4155 push_node_to_stack (topo, callee);
4158 if (info)
4159 info->node_is_self_scc = self_scc;
4161 v = pop_node_from_stack (topo);
4164 /* Afterwards, propagate along edges leading out of the SCC, calculates
4165 the local effects of the discovered constants and all valid values to
4166 their topological sort. */
4167 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4168 if (v->has_gimple_body_p ()
4169 && opt_for_fn (v->decl, flag_ipa_cp)
4170 && opt_for_fn (v->decl, optimize))
4172 struct cgraph_edge *cs;
4174 estimate_local_effects (v);
4175 add_all_node_vals_to_toposort (v, topo);
4176 for (cs = v->callees; cs; cs = cs->next_callee)
4177 if (!ipa_edge_within_scc (cs))
4178 propagate_constants_across_call (cs);
4180 cycle_nodes.release ();
4184 /* Propagate the estimated effects of individual values along the topological
4185 from the dependent values to those they depend on. */
4187 template <typename valtype>
4188 void
4189 value_topo_info<valtype>::propagate_effects ()
4191 ipcp_value<valtype> *base;
4192 hash_set<ipcp_value<valtype> *> processed_srcvals;
4194 for (base = values_topo; base; base = base->topo_next)
4196 ipcp_value_source<valtype> *src;
4197 ipcp_value<valtype> *val;
4198 sreal time = 0;
4199 HOST_WIDE_INT size = 0;
4201 for (val = base; val; val = val->scc_next)
4203 time = time + val->local_time_benefit + val->prop_time_benefit;
4204 size = size + val->local_size_cost + val->prop_size_cost;
4207 for (val = base; val; val = val->scc_next)
4209 processed_srcvals.empty ();
4210 for (src = val->sources; src; src = src->next)
4211 if (src->val
4212 && src->cs->maybe_hot_p ())
4214 if (!processed_srcvals.add (src->val))
4216 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4217 if (prop_size < INT_MAX)
4218 src->val->prop_size_cost = prop_size;
4219 else
4220 continue;
4223 int special_factor = 1;
4224 if (val->same_scc (src->val))
4225 special_factor
4226 = opt_for_fn(src->cs->caller->decl,
4227 param_ipa_cp_recursive_freq_factor);
4228 else if (val->self_recursion_generated_p ()
4229 && (src->cs->callee->function_symbol ()
4230 == src->cs->caller))
4232 int max_recur_gen_depth
4233 = opt_for_fn(src->cs->caller->decl,
4234 param_ipa_cp_max_recursive_depth);
4235 special_factor = max_recur_gen_depth
4236 - val->self_recursion_generated_level + 1;
4239 src->val->prop_time_benefit
4240 += time * special_factor * src->cs->sreal_frequency ();
4243 if (size < INT_MAX)
4245 val->prop_time_benefit = time;
4246 val->prop_size_cost = size;
4248 else
4250 val->prop_time_benefit = 0;
4251 val->prop_size_cost = 0;
4257 /* Callback for qsort to sort counts of all edges. */
4259 static int
4260 compare_edge_profile_counts (const void *a, const void *b)
4262 const profile_count *cnt1 = (const profile_count *) a;
4263 const profile_count *cnt2 = (const profile_count *) b;
4265 if (*cnt1 < *cnt2)
4266 return 1;
4267 if (*cnt1 > *cnt2)
4268 return -1;
4269 return 0;
4273 /* Propagate constants, polymorphic contexts and their effects from the
4274 summaries interprocedurally. */
4276 static void
4277 ipcp_propagate_stage (class ipa_topo_info *topo)
4279 struct cgraph_node *node;
4281 if (dump_file)
4282 fprintf (dump_file, "\n Propagating constants:\n\n");
4284 base_count = profile_count::uninitialized ();
4286 bool compute_count_base = false;
4287 unsigned base_count_pos_percent = 0;
4288 FOR_EACH_DEFINED_FUNCTION (node)
4290 if (node->has_gimple_body_p ()
4291 && opt_for_fn (node->decl, flag_ipa_cp)
4292 && opt_for_fn (node->decl, optimize))
4294 ipa_node_params *info = ipa_node_params_sum->get (node);
4295 determine_versionability (node, info);
4297 unsigned nlattices = ipa_get_param_count (info);
4298 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4299 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4300 initialize_node_lattices (node);
4302 ipa_size_summary *s = ipa_size_summaries->get (node);
4303 if (node->definition && !node->alias && s != NULL)
4304 overall_size += s->self_size;
4305 if (node->count.ipa ().initialized_p ())
4307 compute_count_base = true;
4308 unsigned pos_percent = opt_for_fn (node->decl,
4309 param_ipa_cp_profile_count_base);
4310 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4314 if (compute_count_base)
4316 auto_vec<profile_count> all_edge_counts;
4317 all_edge_counts.reserve_exact (symtab->edges_count);
4318 FOR_EACH_DEFINED_FUNCTION (node)
4319 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4321 profile_count count = cs->count.ipa ();
4322 if (!count.nonzero_p ())
4323 continue;
4325 enum availability avail;
4326 cgraph_node *tgt
4327 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4328 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4329 if (info && info->versionable)
4330 all_edge_counts.quick_push (count);
4333 if (!all_edge_counts.is_empty ())
4335 gcc_assert (base_count_pos_percent <= 100);
4336 all_edge_counts.qsort (compare_edge_profile_counts);
4338 unsigned base_count_pos
4339 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4340 base_count = all_edge_counts[base_count_pos];
4342 if (dump_file)
4344 fprintf (dump_file, "\nSelected base_count from %u edges at "
4345 "position %u, arriving at: ", all_edge_counts.length (),
4346 base_count_pos);
4347 base_count.dump (dump_file);
4348 fprintf (dump_file, "\n");
4351 else if (dump_file)
4352 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4353 "continuing as if without profile feedback.\n");
4356 orig_overall_size = overall_size;
4358 if (dump_file)
4359 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4361 propagate_constants_topo (topo);
4362 if (flag_checking)
4363 ipcp_verify_propagated_values ();
4364 topo->constants.propagate_effects ();
4365 topo->contexts.propagate_effects ();
4367 if (dump_file)
4369 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4370 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4374 /* Discover newly direct outgoing edges from NODE which is a new clone with
4375 known KNOWN_CSTS and make them direct. */
4377 static void
4378 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4379 vec<tree> known_csts,
4380 vec<ipa_polymorphic_call_context>
4381 known_contexts,
4382 vec<ipa_argagg_value, va_gc> *aggvals)
4384 struct cgraph_edge *ie, *next_ie;
4385 bool found = false;
4387 for (ie = node->indirect_calls; ie; ie = next_ie)
4389 tree target;
4390 bool speculative;
4392 next_ie = ie->next_callee;
4393 ipa_argagg_value_list avs (aggvals);
4394 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4395 avs, &speculative);
4396 if (target)
4398 bool agg_contents = ie->indirect_info->agg_contents;
4399 bool polymorphic = ie->indirect_info->polymorphic;
4400 int param_index = ie->indirect_info->param_index;
4401 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4402 speculative);
4403 found = true;
4405 if (cs && !agg_contents && !polymorphic)
4407 ipa_node_params *info = ipa_node_params_sum->get (node);
4408 int c = ipa_get_controlled_uses (info, param_index);
4409 if (c != IPA_UNDESCRIBED_USE
4410 && !ipa_get_param_load_dereferenced (info, param_index))
4412 struct ipa_ref *to_del;
4414 c--;
4415 ipa_set_controlled_uses (info, param_index, c);
4416 if (dump_file && (dump_flags & TDF_DETAILS))
4417 fprintf (dump_file, " controlled uses count of param "
4418 "%i bumped down to %i\n", param_index, c);
4419 if (c == 0
4420 && (to_del = node->find_reference (cs->callee, NULL, 0,
4421 IPA_REF_ADDR)))
4423 if (dump_file && (dump_flags & TDF_DETAILS))
4424 fprintf (dump_file, " and even removing its "
4425 "cloning-created reference\n");
4426 to_del->remove_reference ();
4432 /* Turning calls to direct calls will improve overall summary. */
4433 if (found)
4434 ipa_update_overall_fn_summary (node);
4437 class edge_clone_summary;
4438 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4440 /* Edge clone summary. */
4442 class edge_clone_summary
4444 public:
4445 /* Default constructor. */
4446 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4448 /* Default destructor. */
4449 ~edge_clone_summary ()
4451 if (prev_clone)
4452 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4453 if (next_clone)
4454 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4457 cgraph_edge *prev_clone;
4458 cgraph_edge *next_clone;
4461 class edge_clone_summary_t:
4462 public call_summary <edge_clone_summary *>
4464 public:
4465 edge_clone_summary_t (symbol_table *symtab):
4466 call_summary <edge_clone_summary *> (symtab)
4468 m_initialize_when_cloning = true;
4471 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4472 edge_clone_summary *src_data,
4473 edge_clone_summary *dst_data) final override;
4476 /* Edge duplication hook. */
4478 void
4479 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4480 edge_clone_summary *src_data,
4481 edge_clone_summary *dst_data)
4483 if (src_data->next_clone)
4484 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4485 dst_data->prev_clone = src_edge;
4486 dst_data->next_clone = src_data->next_clone;
4487 src_data->next_clone = dst_edge;
4490 /* Return true is CS calls DEST or its clone for all contexts. When
4491 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4492 edges from/to an all-context clone. */
4494 static bool
4495 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4496 bool allow_recursion_to_clone)
4498 enum availability availability;
4499 cgraph_node *callee = cs->callee->function_symbol (&availability);
4501 if (availability <= AVAIL_INTERPOSABLE)
4502 return false;
4503 if (callee == dest)
4504 return true;
4505 if (!allow_recursion_to_clone && cs->caller == callee)
4506 return false;
4508 ipa_node_params *info = ipa_node_params_sum->get (callee);
4509 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4512 /* Return true if edge CS does bring about the value described by SRC to
4513 DEST_VAL of node DEST or its clone for all contexts. */
4515 static bool
4516 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4517 cgraph_node *dest, ipcp_value<tree> *dest_val)
4519 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4521 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4522 || caller_info->node_dead)
4523 return false;
4525 if (!src->val)
4526 return true;
4528 if (caller_info->ipcp_orig_node)
4530 tree t = NULL_TREE;
4531 if (src->offset == -1)
4532 t = caller_info->known_csts[src->index];
4533 else if (ipcp_transformation *ts
4534 = ipcp_get_transformation_summary (cs->caller))
4536 ipa_argagg_value_list avl (ts);
4537 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4539 return (t != NULL_TREE
4540 && values_equal_for_ipcp_p (src->val->value, t));
4542 else
4544 if (src->val == dest_val)
4545 return true;
4547 struct ipcp_agg_lattice *aglat;
4548 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4549 src->index);
4550 if (src->offset == -1)
4551 return (plats->itself.is_single_const ()
4552 && values_equal_for_ipcp_p (src->val->value,
4553 plats->itself.values->value));
4554 else
4556 if (plats->aggs_bottom || plats->aggs_contain_variable)
4557 return false;
4558 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4559 if (aglat->offset == src->offset)
4560 return (aglat->is_single_const ()
4561 && values_equal_for_ipcp_p (src->val->value,
4562 aglat->values->value));
4564 return false;
4568 /* Return true if edge CS does bring about the value described by SRC to
4569 DST_VAL of node DEST or its clone for all contexts. */
4571 static bool
4572 cgraph_edge_brings_value_p (cgraph_edge *cs,
4573 ipcp_value_source<ipa_polymorphic_call_context> *src,
4574 cgraph_node *dest,
4575 ipcp_value<ipa_polymorphic_call_context> *)
4577 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4579 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4580 || caller_info->node_dead)
4581 return false;
4582 if (!src->val)
4583 return true;
4585 if (caller_info->ipcp_orig_node)
4586 return (caller_info->known_contexts.length () > (unsigned) src->index)
4587 && values_equal_for_ipcp_p (src->val->value,
4588 caller_info->known_contexts[src->index]);
4590 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4591 src->index);
4592 return plats->ctxlat.is_single_const ()
4593 && values_equal_for_ipcp_p (src->val->value,
4594 plats->ctxlat.values->value);
4597 /* Get the next clone in the linked list of clones of an edge. */
4599 static inline struct cgraph_edge *
4600 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4602 edge_clone_summary *s = edge_clone_summaries->get (cs);
4603 return s != NULL ? s->next_clone : NULL;
4606 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4607 of them is viable and hot, return true. In that case, for those that still
4608 hold, add their edge frequency and their number and cumulative profile
4609 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4610 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4612 template <typename valtype>
4613 static bool
4614 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4615 sreal *freq_sum, int *caller_count,
4616 profile_count *rec_count_sum,
4617 profile_count *nonrec_count_sum)
4619 ipcp_value_source<valtype> *src;
4620 sreal freq = 0;
4621 int count = 0;
4622 profile_count rec_cnt = profile_count::zero ();
4623 profile_count nonrec_cnt = profile_count::zero ();
4624 bool hot = false;
4625 bool non_self_recursive = false;
4627 for (src = val->sources; src; src = src->next)
4629 struct cgraph_edge *cs = src->cs;
4630 while (cs)
4632 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4634 count++;
4635 freq += cs->sreal_frequency ();
4636 hot |= cs->maybe_hot_p ();
4637 if (cs->caller != dest)
4639 non_self_recursive = true;
4640 if (cs->count.ipa ().initialized_p ())
4641 rec_cnt += cs->count.ipa ();
4643 else if (cs->count.ipa ().initialized_p ())
4644 nonrec_cnt += cs->count.ipa ();
4646 cs = get_next_cgraph_edge_clone (cs);
4650 /* If the only edges bringing a value are self-recursive ones, do not bother
4651 evaluating it. */
4652 if (!non_self_recursive)
4653 return false;
4655 *freq_sum = freq;
4656 *caller_count = count;
4657 *rec_count_sum = rec_cnt;
4658 *nonrec_count_sum = nonrec_cnt;
4660 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4662 struct cgraph_edge *cs;
4664 /* Cold non-SCC source edge could trigger hot recursive execution of
4665 function. Consider the case as hot and rely on following cost model
4666 computation to further select right one. */
4667 for (cs = dest->callers; cs; cs = cs->next_caller)
4668 if (cs->caller == dest && cs->maybe_hot_p ())
4669 return true;
4672 return hot;
4675 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4676 to let a non-self-recursive caller be the first element. Thus, we can
4677 simplify intersecting operations on values that arrive from all of these
4678 callers, especially when there exists self-recursive call. Return true if
4679 this kind of adjustment is possible. */
4681 static bool
4682 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4683 cgraph_node *node)
4685 for (unsigned i = 0; i < callers.length (); i++)
4687 cgraph_edge *cs = callers[i];
4689 if (cs->caller != node)
4691 if (i > 0)
4693 callers[i] = callers[0];
4694 callers[0] = cs;
4696 return true;
4699 return false;
4702 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4703 is assumed their number is known and equal to CALLER_COUNT. */
4705 template <typename valtype>
4706 static vec<cgraph_edge *>
4707 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4708 int caller_count)
4710 ipcp_value_source<valtype> *src;
4711 vec<cgraph_edge *> ret;
4713 ret.create (caller_count);
4714 for (src = val->sources; src; src = src->next)
4716 struct cgraph_edge *cs = src->cs;
4717 while (cs)
4719 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4720 ret.quick_push (cs);
4721 cs = get_next_cgraph_edge_clone (cs);
4725 if (caller_count > 1)
4726 adjust_callers_for_value_intersection (ret, dest);
4728 return ret;
4731 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4732 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4733 should be set to true when the reference created for the constant should be
4734 a load one and not an address one because the corresponding parameter p is
4735 only used as *p. */
4737 static struct ipa_replace_map *
4738 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4739 bool force_load_ref)
4741 struct ipa_replace_map *replace_map;
4743 replace_map = ggc_alloc<ipa_replace_map> ();
4744 if (dump_file)
4746 fprintf (dump_file, " replacing ");
4747 ipa_dump_param (dump_file, info, parm_num);
4749 fprintf (dump_file, " with const ");
4750 print_generic_expr (dump_file, value);
4752 if (force_load_ref)
4753 fprintf (dump_file, " - forcing load reference\n");
4754 else
4755 fprintf (dump_file, "\n");
4757 replace_map->parm_num = parm_num;
4758 replace_map->new_tree = value;
4759 replace_map->force_load_ref = force_load_ref;
4760 return replace_map;
4763 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4764 one, otherwise it will be referred to as the original node. */
4766 static void
4767 dump_profile_updates (cgraph_node *node, bool spec)
4769 if (spec)
4770 fprintf (dump_file, " setting count of the specialized node %s to ",
4771 node->dump_name ());
4772 else
4773 fprintf (dump_file, " setting count of the original node %s to ",
4774 node->dump_name ());
4776 node->count.dump (dump_file);
4777 fprintf (dump_file, "\n");
4778 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4780 fprintf (dump_file, " edge to %s has count ",
4781 cs->callee->dump_name ());
4782 cs->count.dump (dump_file);
4783 fprintf (dump_file, "\n");
4787 /* With partial train run we do not want to assume that original's count is
4788 zero whenever we redurect all executed edges to clone. Simply drop profile
4789 to local one in this case. In eany case, return the new value. ORIG_NODE
4790 is the original node and its count has not been updaed yet. */
4792 profile_count
4793 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4795 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4796 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4797 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4798 remainder = remainder.guessed_local ();
4800 return remainder;
4803 /* Structure to sum counts coming from nodes other than the original node and
4804 its clones. */
4806 struct gather_other_count_struct
4808 cgraph_node *orig;
4809 profile_count other_count;
4812 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4813 counts that come from non-self-recursive calls.. */
4815 static bool
4816 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4818 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4819 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4820 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4821 desc->other_count += cs->count.ipa ();
4822 return false;
4825 /* Structure to help analyze if we need to boost counts of some clones of some
4826 non-recursive edges to match the new callee count. */
4828 struct desc_incoming_count_struct
4830 cgraph_node *orig;
4831 hash_set <cgraph_edge *> *processed_edges;
4832 profile_count count;
4833 unsigned unproc_orig_rec_edges;
4836 /* Go over edges calling NODE and its thunks and gather information about
4837 incoming counts so that we know if we need to make any adjustments. */
4839 static void
4840 analyze_clone_icoming_counts (cgraph_node *node,
4841 desc_incoming_count_struct *desc)
4843 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4844 if (cs->caller->thunk)
4846 analyze_clone_icoming_counts (cs->caller, desc);
4847 continue;
4849 else
4851 if (cs->count.initialized_p ())
4852 desc->count += cs->count.ipa ();
4853 if (!desc->processed_edges->contains (cs)
4854 && cs->caller->clone_of == desc->orig)
4855 desc->unproc_orig_rec_edges++;
4859 /* If caller edge counts of a clone created for a self-recursive arithmetic
4860 jump function must be adjusted because it is coming from a the "seed" clone
4861 for the first value and so has been excessively scaled back as if it was not
4862 a recursive call, adjust it so that the incoming counts of NODE match its
4863 count. NODE is the node or its thunk. */
4865 static void
4866 adjust_clone_incoming_counts (cgraph_node *node,
4867 desc_incoming_count_struct *desc)
4869 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4870 if (cs->caller->thunk)
4872 adjust_clone_incoming_counts (cs->caller, desc);
4873 profile_count sum = profile_count::zero ();
4874 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4875 if (e->count.initialized_p ())
4876 sum += e->count.ipa ();
4877 cs->count = cs->count.combine_with_ipa_count (sum);
4879 else if (!desc->processed_edges->contains (cs)
4880 && cs->caller->clone_of == desc->orig)
4882 cs->count += desc->count;
4883 if (dump_file)
4885 fprintf (dump_file, " Adjusted count of an incoming edge of "
4886 "a clone %s -> %s to ", cs->caller->dump_name (),
4887 cs->callee->dump_name ());
4888 cs->count.dump (dump_file);
4889 fprintf (dump_file, "\n");
4894 /* When ORIG_NODE has been cloned for values which have been generated fora
4895 self-recursive call as a result of an arithmetic pass-through
4896 jump-functions, adjust its count together with counts of all such clones in
4897 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4899 The function sums the counts of the original node and all its clones that
4900 cannot be attributed to a specific clone because it comes from a
4901 non-recursive edge. This sum is then evenly divided between the clones and
4902 on top of that each one gets all the counts which can be attributed directly
4903 to it. */
4905 static void
4906 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4907 const vec<cgraph_node *> &self_gen_clones)
4909 profile_count redist_sum = orig_node->count.ipa ();
4910 if (!(redist_sum > profile_count::zero ()))
4911 return;
4913 if (dump_file)
4914 fprintf (dump_file, " Updating profile of self recursive clone "
4915 "series\n");
4917 gather_other_count_struct gocs;
4918 gocs.orig = orig_node;
4919 gocs.other_count = profile_count::zero ();
4921 auto_vec <profile_count, 8> other_edges_count;
4922 for (cgraph_node *n : self_gen_clones)
4924 gocs.other_count = profile_count::zero ();
4925 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4926 &gocs, false);
4927 other_edges_count.safe_push (gocs.other_count);
4928 redist_sum -= gocs.other_count;
4931 hash_set<cgraph_edge *> processed_edges;
4932 unsigned i = 0;
4933 for (cgraph_node *n : self_gen_clones)
4935 profile_count orig_count = n->count;
4936 profile_count new_count
4937 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4938 new_count = lenient_count_portion_handling (new_count, orig_node);
4939 n->count = new_count;
4940 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4941 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4943 cs->count = cs->count.apply_scale (new_count, orig_count);
4944 processed_edges.add (cs);
4946 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4947 cs->count = cs->count.apply_scale (new_count, orig_count);
4949 i++;
4952 /* There are still going to be edges to ORIG_NODE that have one or more
4953 clones coming from another node clone in SELF_GEN_CLONES and which we
4954 scaled by the same amount, which means that the total incoming sum of
4955 counts to ORIG_NODE will be too high, scale such edges back. */
4956 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4958 if (cs->callee->ultimate_alias_target () == orig_node)
4960 unsigned den = 0;
4961 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4962 if (e->callee->ultimate_alias_target () == orig_node
4963 && processed_edges.contains (e))
4964 den++;
4965 if (den > 0)
4966 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4967 if (e->callee->ultimate_alias_target () == orig_node
4968 && processed_edges.contains (e))
4969 e->count /= den;
4973 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4974 along self-recursive edges are likely to have fairly low count and so
4975 edges from them to nodes in the self_gen_clones do not correspond to the
4976 artificially distributed count of the nodes, the total sum of incoming
4977 edges to some clones might be too low. Detect this situation and correct
4978 it. */
4979 for (cgraph_node *n : self_gen_clones)
4981 if (!(n->count.ipa () > profile_count::zero ()))
4982 continue;
4984 desc_incoming_count_struct desc;
4985 desc.orig = orig_node;
4986 desc.processed_edges = &processed_edges;
4987 desc.count = profile_count::zero ();
4988 desc.unproc_orig_rec_edges = 0;
4989 analyze_clone_icoming_counts (n, &desc);
4991 if (n->count.differs_from_p (desc.count))
4993 if (n->count > desc.count
4994 && desc.unproc_orig_rec_edges > 0)
4996 desc.count = n->count - desc.count;
4997 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4998 adjust_clone_incoming_counts (n, &desc);
5000 else if (dump_file)
5001 fprintf (dump_file,
5002 " Unable to fix up incoming counts for %s.\n",
5003 n->dump_name ());
5007 if (dump_file)
5008 for (cgraph_node *n : self_gen_clones)
5009 dump_profile_updates (n, n != orig_node);
5010 return;
5013 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5014 their profile information to reflect this. This function should not be used
5015 for clones generated for arithmetic pass-through jump functions on a
5016 self-recursive call graph edge, that situation is handled by
5017 update_counts_for_self_gen_clones. */
5019 static void
5020 update_profiling_info (struct cgraph_node *orig_node,
5021 struct cgraph_node *new_node)
5023 struct caller_statistics stats;
5024 profile_count new_sum;
5025 profile_count remainder, orig_node_count = orig_node->count.ipa ();
5027 if (!(orig_node_count > profile_count::zero ()))
5028 return;
5030 if (dump_file)
5032 fprintf (dump_file, " Updating profile from original count: ");
5033 orig_node_count.dump (dump_file);
5034 fprintf (dump_file, "\n");
5037 init_caller_stats (&stats, new_node);
5038 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
5039 false);
5040 new_sum = stats.count_sum;
5042 bool orig_edges_processed = false;
5043 if (new_sum > orig_node_count)
5045 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5046 to global0 category. */
5047 remainder = orig_node->count.global0 ();
5049 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5050 cs->count = cs->count.global0 ();
5051 for (cgraph_edge *cs = orig_node->indirect_calls;
5053 cs = cs->next_callee)
5054 cs->count = cs->count.global0 ();
5055 orig_edges_processed = true;
5057 else if (stats.rec_count_sum.nonzero_p ())
5059 int new_nonrec_calls = stats.n_nonrec_calls;
5060 /* There are self-recursive edges which are likely to bring in the
5061 majority of calls but which we must divide in between the original and
5062 new node. */
5063 init_caller_stats (&stats, orig_node);
5064 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
5065 &stats, false);
5066 int orig_nonrec_calls = stats.n_nonrec_calls;
5067 profile_count orig_nonrec_call_count = stats.count_sum;
5069 if (orig_node->local)
5071 if (!orig_nonrec_call_count.nonzero_p ())
5073 if (dump_file)
5074 fprintf (dump_file, " The original is local and the only "
5075 "incoming edges from non-dead callers with nonzero "
5076 "counts are self-recursive, assuming it is cold.\n");
5077 /* The NEW_NODE count and counts of all its outgoing edges
5078 are still unmodified copies of ORIG_NODE's. Just clear
5079 the latter and bail out. */
5080 profile_count zero;
5081 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5082 zero = profile_count::zero ().guessed_local ();
5083 else
5084 zero = profile_count::adjusted_zero ();
5085 orig_node->count = zero;
5086 for (cgraph_edge *cs = orig_node->callees;
5088 cs = cs->next_callee)
5089 cs->count = zero;
5090 for (cgraph_edge *cs = orig_node->indirect_calls;
5092 cs = cs->next_callee)
5093 cs->count = zero;
5094 return;
5097 else
5099 /* Let's behave as if there was another caller that accounts for all
5100 the calls that were either indirect or from other compilation
5101 units. */
5102 orig_nonrec_calls++;
5103 profile_count pretend_caller_count
5104 = (orig_node_count - new_sum - orig_nonrec_call_count
5105 - stats.rec_count_sum);
5106 orig_nonrec_call_count += pretend_caller_count;
5109 /* Divide all "unexplained" counts roughly proportionally to sums of
5110 counts of non-recursive calls.
5112 We put rather arbitrary limits on how many counts we claim because the
5113 number of non-self-recursive incoming count is only a rough guideline
5114 and there are cases (such as mcf) where using it blindly just takes
5115 too many. And if lattices are considered in the opposite order we
5116 could also take too few. */
5117 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5119 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5120 profile_count new_part
5121 = MAX(MIN (unexp.apply_scale (new_sum,
5122 new_sum + orig_nonrec_call_count),
5123 unexp.apply_scale (limit_den - 1, limit_den)),
5124 unexp.apply_scale (new_nonrec_calls, limit_den));
5125 if (dump_file)
5127 fprintf (dump_file, " Claiming ");
5128 new_part.dump (dump_file);
5129 fprintf (dump_file, " of unexplained ");
5130 unexp.dump (dump_file);
5131 fprintf (dump_file, " counts because of self-recursive "
5132 "calls\n");
5134 new_sum += new_part;
5135 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5136 orig_node);
5138 else
5139 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5140 orig_node);
5142 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5143 new_node->count = new_sum;
5144 orig_node->count = remainder;
5146 profile_count orig_new_node_count = orig_node_count;
5147 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5148 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5149 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5150 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5151 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5153 if (!orig_edges_processed)
5155 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5156 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5157 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5158 for (cgraph_edge *cs = orig_node->indirect_calls;
5160 cs = cs->next_callee)
5161 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5164 if (dump_file)
5166 dump_profile_updates (new_node, true);
5167 dump_profile_updates (orig_node, false);
5171 /* Update the respective profile of specialized NEW_NODE and the original
5172 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5173 have been redirected to the specialized version. */
5175 static void
5176 update_specialized_profile (struct cgraph_node *new_node,
5177 struct cgraph_node *orig_node,
5178 profile_count redirected_sum)
5180 struct cgraph_edge *cs;
5181 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
5183 if (dump_file)
5185 fprintf (dump_file, " the sum of counts of redirected edges is ");
5186 redirected_sum.dump (dump_file);
5187 fprintf (dump_file, "\n old ipa count of the original node is ");
5188 orig_node_count.dump (dump_file);
5189 fprintf (dump_file, "\n");
5191 if (!(orig_node_count > profile_count::zero ()))
5192 return;
5194 new_node_count = new_node->count;
5195 new_node->count += redirected_sum;
5196 orig_node->count
5197 = lenient_count_portion_handling (orig_node->count - redirected_sum,
5198 orig_node);
5200 for (cs = new_node->callees; cs; cs = cs->next_callee)
5201 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5203 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5205 profile_count dec = cs->count.apply_scale (redirected_sum,
5206 orig_node_count);
5207 cs->count -= dec;
5210 if (dump_file)
5212 dump_profile_updates (new_node, true);
5213 dump_profile_updates (orig_node, false);
5217 static void adjust_references_in_caller (cgraph_edge *cs,
5218 symtab_node *symbol, int index);
5220 /* Simple structure to pass a symbol and index (with same meaning as parameters
5221 of adjust_references_in_caller) through a void* parameter of a
5222 call_for_symbol_thunks_and_aliases callback. */
5223 struct symbol_and_index_together
5225 symtab_node *symbol;
5226 int index;
5229 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5230 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5231 static bool
5232 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5234 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5235 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5236 if (!cs->caller->thunk)
5237 adjust_references_in_caller (cs, pack->symbol, pack->index);
5238 return false;
5241 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5242 variable which is only dereferenced and which is represented by SYMBOL. See
5243 if we can remove ADDR reference in callers assosiated witht the call. */
5245 static void
5246 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5248 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5249 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5250 if (jfunc->type == IPA_JF_CONST)
5252 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5253 cs->lto_stmt_uid,
5254 IPA_REF_ADDR);
5255 if (!to_del)
5256 return;
5257 to_del->remove_reference ();
5258 ipa_zap_jf_refdesc (jfunc);
5259 if (dump_file)
5260 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5261 cs->caller->dump_name (), symbol->dump_name ());
5262 return;
5265 if (jfunc->type != IPA_JF_PASS_THROUGH
5266 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
5267 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
5268 return;
5270 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5271 cgraph_node *caller = cs->caller;
5272 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5273 /* TODO: This consistency check may be too big and not really
5274 that useful. Consider removing it. */
5275 tree cst;
5276 if (caller_info->ipcp_orig_node)
5277 cst = caller_info->known_csts[fidx];
5278 else
5280 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5281 gcc_assert (lat->is_single_const ());
5282 cst = lat->values->value;
5284 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5285 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5286 == symbol));
5288 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5289 if (cuses == IPA_UNDESCRIBED_USE)
5290 return;
5291 gcc_assert (cuses > 0);
5292 cuses--;
5293 ipa_set_controlled_uses (caller_info, fidx, cuses);
5294 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5295 if (dump_file && (dump_flags & TDF_DETAILS))
5296 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5297 "to %i.\n", fidx, caller->dump_name (), cuses);
5298 if (cuses)
5299 return;
5301 if (caller_info->ipcp_orig_node)
5303 /* Cloning machinery has created a reference here, we need to either
5304 remove it or change it to a read one. */
5305 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5306 if (to_del)
5308 to_del->remove_reference ();
5309 if (dump_file)
5310 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5311 cs->caller->dump_name (), symbol->dump_name ());
5312 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5314 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5315 if (dump_file)
5316 fprintf (dump_file,
5317 " ...and replaced it with LOAD one.\n");
5322 symbol_and_index_together pack;
5323 pack.symbol = symbol;
5324 pack.index = fidx;
5325 if (caller->can_change_signature)
5326 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5327 &pack, true);
5331 /* Return true if we would like to remove a parameter from NODE when cloning it
5332 with KNOWN_CSTS scalar constants. */
5334 static bool
5335 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5337 auto_vec<bool, 16> surviving;
5338 bool filled_vec = false;
5339 ipa_node_params *info = ipa_node_params_sum->get (node);
5340 int i, count = ipa_get_param_count (info);
5342 for (i = 0; i < count; i++)
5344 if (!known_csts[i] && ipa_is_param_used (info, i))
5345 continue;
5347 if (!filled_vec)
5349 clone_info *info = clone_info::get (node);
5350 if (!info || !info->param_adjustments)
5351 return true;
5352 info->param_adjustments->get_surviving_params (&surviving);
5353 filled_vec = true;
5355 if (surviving.length() < (unsigned) i && surviving[i])
5356 return true;
5358 return false;
5361 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5362 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5363 redirect all edges in CALLERS to it. */
5365 static struct cgraph_node *
5366 create_specialized_node (struct cgraph_node *node,
5367 vec<tree> known_csts,
5368 vec<ipa_polymorphic_call_context> known_contexts,
5369 vec<ipa_argagg_value, va_gc> *aggvals,
5370 vec<cgraph_edge *> &callers)
5372 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5373 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5374 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5375 struct cgraph_node *new_node;
5376 int i, count = ipa_get_param_count (info);
5377 clone_info *cinfo = clone_info::get (node);
5378 ipa_param_adjustments *old_adjustments = cinfo
5379 ? cinfo->param_adjustments : NULL;
5380 ipa_param_adjustments *new_adjustments;
5381 gcc_assert (!info->ipcp_orig_node);
5382 gcc_assert (node->can_change_signature
5383 || !old_adjustments);
5385 if (old_adjustments)
5387 /* At the moment all IPA optimizations should use the number of
5388 parameters of the prevailing decl as the m_always_copy_start.
5389 Handling any other value would complicate the code below, so for the
5390 time bing let's only assert it is so. */
5391 gcc_assert (old_adjustments->m_always_copy_start == count
5392 || old_adjustments->m_always_copy_start < 0);
5393 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5394 for (i = 0; i < old_adj_count; i++)
5396 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5397 if (!node->can_change_signature
5398 || old_adj->op != IPA_PARAM_OP_COPY
5399 || (!known_csts[old_adj->base_index]
5400 && ipa_is_param_used (info, old_adj->base_index)))
5402 ipa_adjusted_param new_adj = *old_adj;
5404 new_adj.prev_clone_adjustment = true;
5405 new_adj.prev_clone_index = i;
5406 vec_safe_push (new_params, new_adj);
5409 bool skip_return = old_adjustments->m_skip_return;
5410 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5411 ipa_param_adjustments (new_params, count,
5412 skip_return));
5414 else if (node->can_change_signature
5415 && want_remove_some_param_p (node, known_csts))
5417 ipa_adjusted_param adj;
5418 memset (&adj, 0, sizeof (adj));
5419 adj.op = IPA_PARAM_OP_COPY;
5420 for (i = 0; i < count; i++)
5421 if (!known_csts[i] && ipa_is_param_used (info, i))
5423 adj.base_index = i;
5424 adj.prev_clone_index = i;
5425 vec_safe_push (new_params, adj);
5427 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5428 ipa_param_adjustments (new_params, count, false));
5430 else
5431 new_adjustments = NULL;
5433 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5434 for (i = callers.length () - 1; i >= 0; i--)
5436 cgraph_edge *cs = callers[i];
5437 if (cs->caller == node)
5439 self_recursive_calls.safe_push (cs);
5440 callers.unordered_remove (i);
5443 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5444 for (i = 0; i < count; i++)
5446 tree t = known_csts[i];
5447 if (!t)
5448 continue;
5450 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5452 bool load_ref = false;
5453 symtab_node *ref_symbol;
5454 if (TREE_CODE (t) == ADDR_EXPR)
5456 tree base = get_base_address (TREE_OPERAND (t, 0));
5457 if (TREE_CODE (base) == VAR_DECL
5458 && ipa_get_controlled_uses (info, i) == 0
5459 && ipa_get_param_load_dereferenced (info, i)
5460 && (ref_symbol = symtab_node::get (base)))
5462 load_ref = true;
5463 if (node->can_change_signature)
5464 for (cgraph_edge *caller : callers)
5465 adjust_references_in_caller (caller, ref_symbol, i);
5469 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5470 if (replace_map)
5471 vec_safe_push (replace_trees, replace_map);
5474 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5475 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5476 node->decl)));
5477 new_node = node->create_virtual_clone (callers, replace_trees,
5478 new_adjustments, "constprop",
5479 suffix_counter);
5480 suffix_counter++;
5482 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5483 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5485 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5486 /* Cloned edges can disappear during cloning as speculation can be
5487 resolved, check that we have one and that it comes from the last
5488 cloning. */
5489 if (cs && cs->caller == new_node)
5490 cs->redirect_callee_duplicating_thunks (new_node);
5491 /* Any future code that would make more than one clone of an outgoing
5492 edge would confuse this mechanism, so let's check that does not
5493 happen. */
5494 gcc_checking_assert (!cs
5495 || !get_next_cgraph_edge_clone (cs)
5496 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5498 if (have_self_recursive_calls)
5499 new_node->expand_all_artificial_thunks ();
5501 ipa_set_node_agg_value_chain (new_node, aggvals);
5502 for (const ipa_argagg_value &av : aggvals)
5503 new_node->maybe_create_reference (av.value, NULL);
5505 if (dump_file && (dump_flags & TDF_DETAILS))
5507 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5508 if (known_contexts.exists ())
5510 for (i = 0; i < count; i++)
5511 if (!known_contexts[i].useless_p ())
5513 fprintf (dump_file, " known ctx %i is ", i);
5514 known_contexts[i].dump (dump_file);
5517 if (aggvals)
5519 fprintf (dump_file, " Aggregate replacements:");
5520 ipa_argagg_value_list avs (aggvals);
5521 avs.dump (dump_file);
5525 new_info = ipa_node_params_sum->get (new_node);
5526 new_info->ipcp_orig_node = node;
5527 new_node->ipcp_clone = true;
5528 new_info->known_csts = known_csts;
5529 new_info->known_contexts = known_contexts;
5531 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5532 aggvals);
5534 return new_node;
5537 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5538 pass-through function to itself when the cgraph_node involved is not an
5539 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5540 no-operation pass-through. */
5542 static bool
5543 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5544 bool simple = true)
5546 enum availability availability;
5547 if (cs->caller == cs->callee->function_symbol (&availability)
5548 && availability > AVAIL_INTERPOSABLE
5549 && jfunc->type == IPA_JF_PASS_THROUGH
5550 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5551 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5552 && ipa_node_params_sum->get (cs->caller)
5553 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5554 return true;
5555 return false;
5558 /* Return true if JFUNC, which describes a part of an aggregate represented or
5559 pointed to by the i-th parameter of call CS, is a pass-through function to
5560 itself when the cgraph_node involved is not an IPA-CP clone.. When
5561 SIMPLE is true, further check if JFUNC is a simple no-operation
5562 pass-through. */
5564 static bool
5565 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5566 const ipa_agg_jf_item *jfunc,
5567 int i, bool simple = true)
5569 enum availability availability;
5570 if (cs->caller == cs->callee->function_symbol (&availability)
5571 && availability > AVAIL_INTERPOSABLE
5572 && jfunc->jftype == IPA_JF_LOAD_AGG
5573 && jfunc->offset == jfunc->value.load_agg.offset
5574 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5575 && jfunc->value.pass_through.formal_id == i
5576 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5577 && ipa_node_params_sum->get (cs->caller)
5578 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5579 return true;
5580 return false;
5583 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5584 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5586 static void
5587 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5588 vec<tree> &known_csts,
5589 const vec<cgraph_edge *> &callers)
5591 ipa_node_params *info = ipa_node_params_sum->get (node);
5592 int i, count = ipa_get_param_count (info);
5594 for (i = 0; i < count; i++)
5596 struct cgraph_edge *cs;
5597 tree newval = NULL_TREE;
5598 int j;
5599 bool first = true;
5600 tree type = ipa_get_type (info, i);
5602 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5603 continue;
5605 FOR_EACH_VEC_ELT (callers, j, cs)
5607 struct ipa_jump_func *jump_func;
5608 tree t;
5610 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5611 if (!args
5612 || i >= ipa_get_cs_argument_count (args)
5613 || (i == 0
5614 && call_passes_through_thunk (cs)))
5616 newval = NULL_TREE;
5617 break;
5619 jump_func = ipa_get_ith_jump_func (args, i);
5621 /* Besides simple pass-through jump function, arithmetic jump
5622 function could also introduce argument-direct-pass-through for
5623 self-feeding recursive call. For example,
5625 fn (int i)
5627 fn (i & 1);
5630 Given that i is 0, recursive propagation via (i & 1) also gets
5631 0. */
5632 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5634 gcc_assert (newval);
5635 t = ipa_get_jf_arith_result (
5636 ipa_get_jf_pass_through_operation (jump_func),
5637 newval,
5638 ipa_get_jf_pass_through_operand (jump_func),
5639 type);
5641 else
5642 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5643 jump_func, type);
5644 if (!t
5645 || (newval
5646 && !values_equal_for_ipcp_p (t, newval))
5647 || (!first && !newval))
5649 newval = NULL_TREE;
5650 break;
5652 else
5653 newval = t;
5654 first = false;
5657 if (newval)
5659 if (dump_file && (dump_flags & TDF_DETAILS))
5661 fprintf (dump_file, " adding an extra known scalar value ");
5662 print_ipcp_constant_value (dump_file, newval);
5663 fprintf (dump_file, " for ");
5664 ipa_dump_param (dump_file, info, i);
5665 fprintf (dump_file, "\n");
5668 known_csts[i] = newval;
5673 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5674 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5675 CALLERS. */
5677 static void
5678 find_more_contexts_for_caller_subset (cgraph_node *node,
5679 vec<ipa_polymorphic_call_context>
5680 *known_contexts,
5681 const vec<cgraph_edge *> &callers)
5683 ipa_node_params *info = ipa_node_params_sum->get (node);
5684 int i, count = ipa_get_param_count (info);
5686 for (i = 0; i < count; i++)
5688 cgraph_edge *cs;
5690 if (ipa_get_poly_ctx_lat (info, i)->bottom
5691 || (known_contexts->exists ()
5692 && !(*known_contexts)[i].useless_p ()))
5693 continue;
5695 ipa_polymorphic_call_context newval;
5696 bool first = true;
5697 int j;
5699 FOR_EACH_VEC_ELT (callers, j, cs)
5701 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5702 if (!args
5703 || i >= ipa_get_cs_argument_count (args))
5704 return;
5705 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5706 ipa_polymorphic_call_context ctx;
5707 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5708 cs, i, jfunc);
5709 if (first)
5711 newval = ctx;
5712 first = false;
5714 else
5715 newval.meet_with (ctx);
5716 if (newval.useless_p ())
5717 break;
5720 if (!newval.useless_p ())
5722 if (dump_file && (dump_flags & TDF_DETAILS))
5724 fprintf (dump_file, " adding an extra known polymorphic "
5725 "context ");
5726 print_ipcp_constant_value (dump_file, newval);
5727 fprintf (dump_file, " for ");
5728 ipa_dump_param (dump_file, info, i);
5729 fprintf (dump_file, "\n");
5732 if (!known_contexts->exists ())
5733 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5734 true);
5735 (*known_contexts)[i] = newval;
5741 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5742 RES. If INTERIM is non-NULL, it contains the current interim state of
5743 collected aggregate values which can be used to compute values passed over
5744 self-recursive edges.
5746 This basically one iteration of push_agg_values_from_edge over one
5747 parameter, which allows for simpler early returns. */
5749 static void
5750 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5751 vec<ipa_argagg_value> *res,
5752 const ipa_argagg_value_list *interim)
5754 bool agg_values_from_caller = false;
5755 bool agg_jf_preserved = false;
5756 unsigned unit_delta = UINT_MAX;
5757 int src_idx = -1;
5758 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5759 index);
5761 if (jfunc->type == IPA_JF_PASS_THROUGH
5762 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5764 agg_values_from_caller = true;
5765 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5766 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5767 unit_delta = 0;
5769 else if (jfunc->type == IPA_JF_ANCESTOR
5770 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5772 agg_values_from_caller = true;
5773 agg_jf_preserved = true;
5774 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5775 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5778 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5779 if (agg_values_from_caller)
5781 if (caller_info->ipcp_orig_node)
5783 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5784 ipcp_transformation *ts
5785 = ipcp_get_transformation_summary (cs->caller);
5786 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5787 ipcp_param_lattices *orig_plats
5788 = ipa_get_parm_lattices (orig_info, src_idx);
5789 if (ts
5790 && orig_plats->aggs
5791 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5793 ipa_argagg_value_list src (ts);
5794 src.push_adjusted_values (src_idx, index, unit_delta, res);
5795 return;
5798 else
5800 ipcp_param_lattices *src_plats
5801 = ipa_get_parm_lattices (caller_info, src_idx);
5802 if (src_plats->aggs
5803 && !src_plats->aggs_bottom
5804 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5806 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5808 interim->push_adjusted_values (src_idx, index, unit_delta,
5809 res);
5810 return;
5812 if (!src_plats->aggs_contain_variable)
5814 push_agg_values_from_plats (src_plats, index, unit_delta,
5815 res);
5816 return;
5822 if (!jfunc->agg.items)
5823 return;
5824 bool first = true;
5825 unsigned prev_unit_offset = 0;
5826 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5828 tree value, srcvalue;
5829 /* Besides simple pass-through aggregate jump function, arithmetic
5830 aggregate jump function could also bring same aggregate value as
5831 parameter passed-in for self-feeding recursive call. For example,
5833 fn (int *i)
5835 int j = *i & 1;
5836 fn (&j);
5839 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5840 if (interim
5841 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5842 && (srcvalue = interim->get_value(index,
5843 agg_jf.offset / BITS_PER_UNIT)))
5844 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5845 srcvalue,
5846 agg_jf.value.pass_through.operand,
5847 agg_jf.type);
5848 else
5849 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5850 &agg_jf);
5851 if (value)
5853 struct ipa_argagg_value iav;
5854 iav.value = value;
5855 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5856 iav.index = index;
5857 iav.by_ref = jfunc->agg.by_ref;
5858 iav.killed = false;
5860 gcc_assert (first
5861 || iav.unit_offset > prev_unit_offset);
5862 prev_unit_offset = iav.unit_offset;
5863 first = false;
5865 res->safe_push (iav);
5868 return;
5871 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5872 description of ultimate callee of CS or the one it was cloned from (the
5873 summary where lattices are). If INTERIM is non-NULL, it contains the
5874 current interim state of collected aggregate values which can be used to
5875 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5876 is true) and to skip values which clearly will not be part of intersection
5877 with INTERIM. */
5879 static void
5880 push_agg_values_from_edge (struct cgraph_edge *cs,
5881 ipa_node_params *dest_info,
5882 vec<ipa_argagg_value> *res,
5883 const ipa_argagg_value_list *interim,
5884 bool optimize_self_recursion)
5886 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5887 if (!args)
5888 return;
5890 int count = MIN (ipa_get_param_count (dest_info),
5891 ipa_get_cs_argument_count (args));
5893 unsigned interim_index = 0;
5894 for (int index = 0; index < count; index++)
5896 if (interim)
5898 while (interim_index < interim->m_elts.size ()
5899 && interim->m_elts[interim_index].value
5900 && interim->m_elts[interim_index].index < index)
5901 interim_index++;
5902 if (interim_index >= interim->m_elts.size ()
5903 || interim->m_elts[interim_index].index > index)
5904 continue;
5907 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5908 if (!ipa_is_param_used (dest_info, index)
5909 || plats->aggs_bottom)
5910 continue;
5911 push_agg_values_for_index_from_edge (cs, index, res,
5912 optimize_self_recursion ? interim
5913 : NULL);
5918 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5919 from all of them. Return nullptr if there are none. */
5921 static struct vec<ipa_argagg_value, va_gc> *
5922 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5923 const vec<cgraph_edge *> &callers)
5925 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5926 if (dest_info->ipcp_orig_node)
5927 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5929 /* gather_edges_for_value puts a non-recursive call into the first element of
5930 callers if it can. */
5931 auto_vec<ipa_argagg_value, 32> interim;
5932 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5934 unsigned valid_entries = interim.length ();
5935 if (!valid_entries)
5936 return nullptr;
5938 unsigned caller_count = callers.length();
5939 for (unsigned i = 1; i < caller_count; i++)
5941 auto_vec<ipa_argagg_value, 32> last;
5942 ipa_argagg_value_list avs (&interim);
5943 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5945 valid_entries = intersect_argaggs_with (interim, last);
5946 if (!valid_entries)
5947 return nullptr;
5950 vec<ipa_argagg_value, va_gc> *res = NULL;
5951 vec_safe_reserve_exact (res, valid_entries);
5952 for (const ipa_argagg_value &av : interim)
5953 if (av.value)
5954 res->quick_push(av);
5955 gcc_checking_assert (res->length () == valid_entries);
5956 return res;
5959 /* Determine whether CS also brings all scalar values that the NODE is
5960 specialized for. */
5962 static bool
5963 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5964 struct cgraph_node *node)
5966 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5967 int count = ipa_get_param_count (dest_info);
5968 class ipa_node_params *caller_info;
5969 class ipa_edge_args *args;
5970 int i;
5972 caller_info = ipa_node_params_sum->get (cs->caller);
5973 args = ipa_edge_args_sum->get (cs);
5974 for (i = 0; i < count; i++)
5976 struct ipa_jump_func *jump_func;
5977 tree val, t;
5979 val = dest_info->known_csts[i];
5980 if (!val)
5981 continue;
5983 if (i >= ipa_get_cs_argument_count (args))
5984 return false;
5985 jump_func = ipa_get_ith_jump_func (args, i);
5986 t = ipa_value_from_jfunc (caller_info, jump_func,
5987 ipa_get_type (dest_info, i));
5988 if (!t || !values_equal_for_ipcp_p (val, t))
5989 return false;
5991 return true;
5994 /* Determine whether CS also brings all aggregate values that NODE is
5995 specialized for. */
5997 static bool
5998 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5999 struct cgraph_node *node)
6001 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
6002 if (!ts || vec_safe_is_empty (ts->m_agg_values))
6003 return true;
6005 const ipa_argagg_value_list existing (ts->m_agg_values);
6006 auto_vec<ipa_argagg_value, 32> edge_values;
6007 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
6008 gcc_checking_assert (dest_info->ipcp_orig_node);
6009 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
6010 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
6011 const ipa_argagg_value_list avl (&edge_values);
6012 return avl.superset_of_p (existing);
6015 /* Given an original NODE and a VAL for which we have already created a
6016 specialized clone, look whether there are incoming edges that still lead
6017 into the old node but now also bring the requested value and also conform to
6018 all other criteria such that they can be redirected the special node.
6019 This function can therefore redirect the final edge in a SCC. */
6021 template <typename valtype>
6022 static void
6023 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
6025 ipcp_value_source<valtype> *src;
6026 profile_count redirected_sum = profile_count::zero ();
6028 for (src = val->sources; src; src = src->next)
6030 struct cgraph_edge *cs = src->cs;
6031 while (cs)
6033 if (cgraph_edge_brings_value_p (cs, src, node, val)
6034 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
6035 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
6037 if (dump_file)
6038 fprintf (dump_file, " - adding an extra caller %s of %s\n",
6039 cs->caller->dump_name (),
6040 val->spec_node->dump_name ());
6042 cs->redirect_callee_duplicating_thunks (val->spec_node);
6043 val->spec_node->expand_all_artificial_thunks ();
6044 if (cs->count.ipa ().initialized_p ())
6045 redirected_sum = redirected_sum + cs->count.ipa ();
6047 cs = get_next_cgraph_edge_clone (cs);
6051 if (redirected_sum.nonzero_p ())
6052 update_specialized_profile (val->spec_node, node, redirected_sum);
6055 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6057 static bool
6058 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
6060 ipa_polymorphic_call_context *ctx;
6061 int i;
6063 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
6064 if (!ctx->useless_p ())
6065 return true;
6066 return false;
6069 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6071 static vec<ipa_polymorphic_call_context>
6072 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
6074 if (known_contexts_useful_p (known_contexts))
6075 return known_contexts.copy ();
6076 else
6077 return vNULL;
6080 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6081 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6082 copy too. */
6084 static void
6085 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6086 vec<tree> *known_csts,
6087 vec<ipa_polymorphic_call_context> *known_contexts,
6088 ipcp_value<tree> *val, int index)
6090 *known_csts = avals->m_known_vals.copy ();
6091 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6092 (*known_csts)[index] = val->value;
6095 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6096 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6097 INDEX. */
6099 static void
6100 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6101 vec<tree> *known_csts,
6102 vec<ipa_polymorphic_call_context> *known_contexts,
6103 ipcp_value<ipa_polymorphic_call_context> *val,
6104 int index)
6106 *known_csts = avals->m_known_vals.copy ();
6107 *known_contexts = avals->m_known_contexts.copy ();
6108 (*known_contexts)[index] = val->value;
6111 /* Return true if OFFSET indicates this was not an aggregate value or there is
6112 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6113 AGGVALS list. */
6115 DEBUG_FUNCTION bool
6116 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6117 int index, HOST_WIDE_INT offset, tree value)
6119 if (offset == -1)
6120 return true;
6122 const ipa_argagg_value_list avl (aggvals);
6123 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6124 return v && values_equal_for_ipcp_p (v, value);
6127 /* Return true if offset is minus one because source of a polymorphic context
6128 cannot be an aggregate value. */
6130 DEBUG_FUNCTION bool
6131 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6132 int , HOST_WIDE_INT offset,
6133 ipa_polymorphic_call_context)
6135 return offset == -1;
6138 /* Decide whether to create a special version of NODE for value VAL of
6139 parameter at the given INDEX. If OFFSET is -1, the value is for the
6140 parameter itself, otherwise it is stored at the given OFFSET of the
6141 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6142 is a vector which contains clones created for self-recursive calls with an
6143 arithmetic pass-through jump function. */
6145 template <typename valtype>
6146 static bool
6147 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6148 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6149 vec<cgraph_node *> *self_gen_clones)
6151 int caller_count;
6152 sreal freq_sum;
6153 profile_count count_sum, rec_count_sum;
6154 vec<cgraph_edge *> callers;
6156 if (val->spec_node)
6158 perhaps_add_new_callers (node, val);
6159 return false;
6161 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6163 if (dump_file && (dump_flags & TDF_DETAILS))
6164 fprintf (dump_file, " Ignoring candidate value because "
6165 "maximum unit size would be reached with %li.\n",
6166 val->local_size_cost + overall_size);
6167 return false;
6169 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6170 &rec_count_sum, &count_sum))
6171 return false;
6173 if (!dbg_cnt (ipa_cp_values))
6174 return false;
6176 if (val->self_recursion_generated_p ())
6178 /* The edge counts in this case might not have been adjusted yet.
6179 Nevertleless, even if they were it would be only a guesswork which we
6180 can do now. The recursive part of the counts can be derived from the
6181 count of the original node anyway. */
6182 if (node->count.ipa ().nonzero_p ())
6184 unsigned dem = self_gen_clones->length () + 1;
6185 rec_count_sum = node->count.ipa () / dem;
6187 else
6188 rec_count_sum = profile_count::zero ();
6191 /* get_info_about_necessary_edges only sums up ipa counts. */
6192 count_sum += rec_count_sum;
6194 if (dump_file && (dump_flags & TDF_DETAILS))
6196 fprintf (dump_file, " - considering value ");
6197 print_ipcp_constant_value (dump_file, val->value);
6198 fprintf (dump_file, " for ");
6199 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6200 if (offset != -1)
6201 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6202 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6205 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6206 freq_sum, count_sum,
6207 val->local_size_cost)
6208 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6209 freq_sum, count_sum, val->prop_size_cost))
6210 return false;
6212 if (dump_file)
6213 fprintf (dump_file, " Creating a specialized node of %s.\n",
6214 node->dump_name ());
6216 vec<tree> known_csts;
6217 vec<ipa_polymorphic_call_context> known_contexts;
6219 callers = gather_edges_for_value (val, node, caller_count);
6220 if (offset == -1)
6221 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6222 else
6224 known_csts = avals->m_known_vals.copy ();
6225 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6227 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6228 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6229 vec<ipa_argagg_value, va_gc> *aggvals
6230 = find_aggregate_values_for_callers_subset (node, callers);
6231 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6232 offset, val->value));
6233 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6234 aggvals, callers);
6236 if (val->self_recursion_generated_p ())
6237 self_gen_clones->safe_push (val->spec_node);
6238 else
6239 update_profiling_info (node, val->spec_node);
6241 callers.release ();
6242 overall_size += val->local_size_cost;
6243 if (dump_file && (dump_flags & TDF_DETAILS))
6244 fprintf (dump_file, " overall size reached %li\n",
6245 overall_size);
6247 /* TODO: If for some lattice there is only one other known value
6248 left, make a special node for it too. */
6250 return true;
6253 /* Like irange::contains_p(), but convert VAL to the range of R if
6254 necessary. */
6256 static inline bool
6257 ipa_range_contains_p (const vrange &r, tree val)
6259 if (r.undefined_p ())
6260 return false;
6262 tree type = r.type ();
6263 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6264 return false;
6266 val = fold_convert (type, val);
6267 return r.contains_p (val);
6270 /* Decide whether and what specialized clones of NODE should be created. */
6272 static bool
6273 decide_whether_version_node (struct cgraph_node *node)
6275 ipa_node_params *info = ipa_node_params_sum->get (node);
6276 int i, count = ipa_get_param_count (info);
6277 bool ret = false;
6279 if (count == 0)
6280 return false;
6282 if (dump_file && (dump_flags & TDF_DETAILS))
6283 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6284 node->dump_name ());
6286 auto_vec <cgraph_node *, 9> self_gen_clones;
6287 ipa_auto_call_arg_values avals;
6288 gather_context_independent_values (info, &avals, false, NULL);
6290 for (i = 0; i < count;i++)
6292 if (!ipa_is_param_used (info, i))
6293 continue;
6295 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6296 ipcp_lattice<tree> *lat = &plats->itself;
6297 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6299 if (!lat->bottom
6300 && !avals.m_known_vals[i])
6302 ipcp_value<tree> *val;
6303 for (val = lat->values; val; val = val->next)
6305 /* If some values generated for self-recursive calls with
6306 arithmetic jump functions fall outside of the known
6307 range for the parameter, we can skip them. */
6308 if (TREE_CODE (val->value) == INTEGER_CST
6309 && !plats->m_value_range.bottom_p ()
6310 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6311 val->value))
6313 /* This can happen also if a constant present in the source
6314 code falls outside of the range of parameter's type, so we
6315 cannot assert. */
6316 if (dump_file && (dump_flags & TDF_DETAILS))
6318 fprintf (dump_file, " - skipping%s value ",
6319 val->self_recursion_generated_p ()
6320 ? " self_recursion_generated" : "");
6321 print_ipcp_constant_value (dump_file, val->value);
6322 fprintf (dump_file, " because it is outside known "
6323 "value range.\n");
6325 continue;
6327 ret |= decide_about_value (node, i, -1, val, &avals,
6328 &self_gen_clones);
6332 if (!plats->aggs_bottom)
6334 struct ipcp_agg_lattice *aglat;
6335 ipcp_value<tree> *val;
6336 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6337 if (!aglat->bottom && aglat->values
6338 /* If the following is false, the one value has been considered
6339 for cloning for all contexts. */
6340 && (plats->aggs_contain_variable
6341 || !aglat->is_single_const ()))
6342 for (val = aglat->values; val; val = val->next)
6343 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6344 &self_gen_clones);
6347 if (!ctxlat->bottom
6348 && avals.m_known_contexts[i].useless_p ())
6350 ipcp_value<ipa_polymorphic_call_context> *val;
6351 for (val = ctxlat->values; val; val = val->next)
6352 ret |= decide_about_value (node, i, -1, val, &avals,
6353 &self_gen_clones);
6357 if (!self_gen_clones.is_empty ())
6359 self_gen_clones.safe_push (node);
6360 update_counts_for_self_gen_clones (node, self_gen_clones);
6363 if (info->do_clone_for_all_contexts)
6365 if (!dbg_cnt (ipa_cp_values))
6367 info->do_clone_for_all_contexts = false;
6368 return ret;
6371 struct cgraph_node *clone;
6372 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6374 for (int i = callers.length () - 1; i >= 0; i--)
6376 cgraph_edge *cs = callers[i];
6377 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6379 if (caller_info && caller_info->node_dead)
6380 callers.unordered_remove (i);
6383 if (!adjust_callers_for_value_intersection (callers, node))
6385 /* If node is not called by anyone, or all its caller edges are
6386 self-recursive, the node is not really in use, no need to do
6387 cloning. */
6388 info->do_clone_for_all_contexts = false;
6389 return ret;
6392 if (dump_file)
6393 fprintf (dump_file, " - Creating a specialized node of %s "
6394 "for all known contexts.\n", node->dump_name ());
6396 vec<tree> known_csts = avals.m_known_vals.copy ();
6397 vec<ipa_polymorphic_call_context> known_contexts
6398 = copy_useful_known_contexts (avals.m_known_contexts);
6399 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6400 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6401 vec<ipa_argagg_value, va_gc> *aggvals
6402 = find_aggregate_values_for_callers_subset (node, callers);
6404 if (!known_contexts_useful_p (known_contexts))
6406 known_contexts.release ();
6407 known_contexts = vNULL;
6409 clone = create_specialized_node (node, known_csts, known_contexts,
6410 aggvals, callers);
6411 info->do_clone_for_all_contexts = false;
6412 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6413 ret = true;
6416 return ret;
6419 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6421 static void
6422 spread_undeadness (struct cgraph_node *node)
6424 struct cgraph_edge *cs;
6426 for (cs = node->callees; cs; cs = cs->next_callee)
6427 if (ipa_edge_within_scc (cs))
6429 struct cgraph_node *callee;
6430 class ipa_node_params *info;
6432 callee = cs->callee->function_symbol (NULL);
6433 info = ipa_node_params_sum->get (callee);
6435 if (info && info->node_dead)
6437 info->node_dead = 0;
6438 spread_undeadness (callee);
6443 /* Return true if NODE has a caller from outside of its SCC that is not
6444 dead. Worker callback for cgraph_for_node_and_aliases. */
6446 static bool
6447 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6448 void *data ATTRIBUTE_UNUSED)
6450 struct cgraph_edge *cs;
6452 for (cs = node->callers; cs; cs = cs->next_caller)
6453 if (cs->caller->thunk
6454 && cs->caller->call_for_symbol_thunks_and_aliases
6455 (has_undead_caller_from_outside_scc_p, NULL, true))
6456 return true;
6457 else if (!ipa_edge_within_scc (cs))
6459 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6460 if (!caller_info /* Unoptimized caller are like dead ones. */
6461 || !caller_info->node_dead)
6462 return true;
6464 return false;
6468 /* Identify nodes within the same SCC as NODE which are no longer needed
6469 because of new clones and will be removed as unreachable. */
6471 static void
6472 identify_dead_nodes (struct cgraph_node *node)
6474 struct cgraph_node *v;
6475 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6476 if (v->local)
6478 ipa_node_params *info = ipa_node_params_sum->get (v);
6479 if (info
6480 && !v->call_for_symbol_thunks_and_aliases
6481 (has_undead_caller_from_outside_scc_p, NULL, true))
6482 info->node_dead = 1;
6485 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6487 ipa_node_params *info = ipa_node_params_sum->get (v);
6488 if (info && !info->node_dead)
6489 spread_undeadness (v);
6492 if (dump_file && (dump_flags & TDF_DETAILS))
6494 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6495 if (ipa_node_params_sum->get (v)
6496 && ipa_node_params_sum->get (v)->node_dead)
6497 fprintf (dump_file, " Marking node as dead: %s.\n",
6498 v->dump_name ());
6502 /* The decision stage. Iterate over the topological order of call graph nodes
6503 TOPO and make specialized clones if deemed beneficial. */
6505 static void
6506 ipcp_decision_stage (class ipa_topo_info *topo)
6508 int i;
6510 if (dump_file)
6511 fprintf (dump_file, "\nIPA decision stage:\n\n");
6513 for (i = topo->nnodes - 1; i >= 0; i--)
6515 struct cgraph_node *node = topo->order[i];
6516 bool change = false, iterate = true;
6518 while (iterate)
6520 struct cgraph_node *v;
6521 iterate = false;
6522 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6523 if (v->has_gimple_body_p ()
6524 && ipcp_versionable_function_p (v))
6525 iterate |= decide_whether_version_node (v);
6527 change |= iterate;
6529 if (change)
6530 identify_dead_nodes (node);
6534 /* Look up all VR and bits information that we have discovered and copy it
6535 over to the transformation summary. */
6537 static void
6538 ipcp_store_vr_results (void)
6540 cgraph_node *node;
6542 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6544 ipa_node_params *info = ipa_node_params_sum->get (node);
6545 bool dumped_sth = false;
6546 bool found_useful_result = false;
6547 bool do_vr = true;
6548 bool do_bits = true;
6550 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6552 if (dump_file)
6553 fprintf (dump_file, "Not considering %s for VR discovery "
6554 "and propagate; -fipa-ipa-vrp: disabled.\n",
6555 node->dump_name ());
6556 do_vr = false;
6558 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6560 if (dump_file)
6561 fprintf (dump_file, "Not considering %s for ipa bitwise "
6562 "propagation ; -fipa-bit-cp: disabled.\n",
6563 node->dump_name ());
6564 do_bits = false;
6566 if (!do_bits && !do_vr)
6567 continue;
6569 if (info->ipcp_orig_node)
6570 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6571 if (!info->lattices)
6572 /* Newly expanded artificial thunks do not have lattices. */
6573 continue;
6575 unsigned count = ipa_get_param_count (info);
6576 for (unsigned i = 0; i < count; i++)
6578 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6579 if (do_vr
6580 && !plats->m_value_range.bottom_p ()
6581 && !plats->m_value_range.top_p ())
6583 found_useful_result = true;
6584 break;
6586 if (do_bits && plats->bits_lattice.constant_p ())
6588 found_useful_result = true;
6589 break;
6592 if (!found_useful_result)
6593 continue;
6595 ipcp_transformation_initialize ();
6596 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6597 vec_safe_reserve_exact (ts->m_vr, count);
6599 for (unsigned i = 0; i < count; i++)
6601 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6602 ipcp_bits_lattice *bits = NULL;
6604 if (do_bits
6605 && plats->bits_lattice.constant_p ()
6606 && dbg_cnt (ipa_cp_bits))
6607 bits = &plats->bits_lattice;
6609 if (do_vr
6610 && !plats->m_value_range.bottom_p ()
6611 && !plats->m_value_range.top_p ()
6612 && dbg_cnt (ipa_cp_vr))
6614 if (bits)
6616 Value_Range tmp = plats->m_value_range.m_vr;
6617 tree type = ipa_get_type (info, i);
6618 irange &r = as_a<irange> (tmp);
6619 irange_bitmask bm (wide_int::from (bits->get_value (),
6620 TYPE_PRECISION (type),
6621 TYPE_SIGN (type)),
6622 wide_int::from (bits->get_mask (),
6623 TYPE_PRECISION (type),
6624 TYPE_SIGN (type)));
6625 r.update_bitmask (bm);
6626 ipa_vr vr (tmp);
6627 ts->m_vr->quick_push (vr);
6629 else
6631 ipa_vr vr (plats->m_value_range.m_vr);
6632 ts->m_vr->quick_push (vr);
6635 else if (bits)
6637 tree type = ipa_get_type (info, i);
6638 Value_Range tmp;
6639 tmp.set_varying (type);
6640 irange &r = as_a<irange> (tmp);
6641 irange_bitmask bm (wide_int::from (bits->get_value (),
6642 TYPE_PRECISION (type),
6643 TYPE_SIGN (type)),
6644 wide_int::from (bits->get_mask (),
6645 TYPE_PRECISION (type),
6646 TYPE_SIGN (type)));
6647 r.update_bitmask (bm);
6648 ipa_vr vr (tmp);
6649 ts->m_vr->quick_push (vr);
6651 else
6653 ipa_vr vr;
6654 ts->m_vr->quick_push (vr);
6657 if (!dump_file || !bits)
6658 continue;
6660 if (!dumped_sth)
6662 fprintf (dump_file, "Propagated bits info for function %s:\n",
6663 node->dump_name ());
6664 dumped_sth = true;
6666 fprintf (dump_file, " param %i: value = ", i);
6667 print_hex (bits->get_value (), dump_file);
6668 fprintf (dump_file, ", mask = ");
6669 print_hex (bits->get_mask (), dump_file);
6670 fprintf (dump_file, "\n");
6675 /* The IPCP driver. */
6677 static unsigned int
6678 ipcp_driver (void)
6680 class ipa_topo_info topo;
6682 if (edge_clone_summaries == NULL)
6683 edge_clone_summaries = new edge_clone_summary_t (symtab);
6685 ipa_check_create_node_params ();
6686 ipa_check_create_edge_args ();
6687 clone_num_suffixes = new hash_map<const char *, unsigned>;
6689 if (dump_file)
6691 fprintf (dump_file, "\nIPA structures before propagation:\n");
6692 if (dump_flags & TDF_DETAILS)
6693 ipa_print_all_params (dump_file);
6694 ipa_print_all_jump_functions (dump_file);
6697 /* Topological sort. */
6698 build_toporder_info (&topo);
6699 /* Do the interprocedural propagation. */
6700 ipcp_propagate_stage (&topo);
6701 /* Decide what constant propagation and cloning should be performed. */
6702 ipcp_decision_stage (&topo);
6703 /* Store results of value range and bits propagation. */
6704 ipcp_store_vr_results ();
6706 /* Free all IPCP structures. */
6707 delete clone_num_suffixes;
6708 free_toporder_info (&topo);
6709 delete edge_clone_summaries;
6710 edge_clone_summaries = NULL;
6711 ipa_free_all_structures_after_ipa_cp ();
6712 if (dump_file)
6713 fprintf (dump_file, "\nIPA constant propagation end\n");
6714 return 0;
6717 /* Initialization and computation of IPCP data structures. This is the initial
6718 intraprocedural analysis of functions, which gathers information to be
6719 propagated later on. */
6721 static void
6722 ipcp_generate_summary (void)
6724 struct cgraph_node *node;
6726 if (dump_file)
6727 fprintf (dump_file, "\nIPA constant propagation start:\n");
6728 ipa_register_cgraph_hooks ();
6730 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6731 ipa_analyze_node (node);
6734 namespace {
6736 const pass_data pass_data_ipa_cp =
6738 IPA_PASS, /* type */
6739 "cp", /* name */
6740 OPTGROUP_NONE, /* optinfo_flags */
6741 TV_IPA_CONSTANT_PROP, /* tv_id */
6742 0, /* properties_required */
6743 0, /* properties_provided */
6744 0, /* properties_destroyed */
6745 0, /* todo_flags_start */
6746 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6749 class pass_ipa_cp : public ipa_opt_pass_d
6751 public:
6752 pass_ipa_cp (gcc::context *ctxt)
6753 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6754 ipcp_generate_summary, /* generate_summary */
6755 NULL, /* write_summary */
6756 NULL, /* read_summary */
6757 ipcp_write_transformation_summaries, /*
6758 write_optimization_summary */
6759 ipcp_read_transformation_summaries, /*
6760 read_optimization_summary */
6761 NULL, /* stmt_fixup */
6762 0, /* function_transform_todo_flags_start */
6763 ipcp_transform_function, /* function_transform */
6764 NULL) /* variable_transform */
6767 /* opt_pass methods: */
6768 bool gate (function *) final override
6770 /* FIXME: We should remove the optimize check after we ensure we never run
6771 IPA passes when not optimizing. */
6772 return (flag_ipa_cp && optimize) || in_lto_p;
6775 unsigned int execute (function *) final override { return ipcp_driver (); }
6777 }; // class pass_ipa_cp
6779 } // anon namespace
6781 ipa_opt_pass_d *
6782 make_pass_ipa_cp (gcc::context *ctxt)
6784 return new pass_ipa_cp (ctxt);
6787 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6788 within the same process. For use by toplev::finalize. */
6790 void
6791 ipa_cp_cc_finalize (void)
6793 base_count = profile_count::uninitialized ();
6794 overall_size = 0;
6795 orig_overall_size = 0;
6796 ipcp_free_transformation_sum ();
6799 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6800 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6801 DECL_UID-sorted vector if available (which is pre-computed only if there are
6802 many parameters). Can return -1 if param is static chain not represented
6803 among DECL_ARGUMENTS. */
6806 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6808 gcc_assert (TREE_CODE (param) == PARM_DECL);
6809 if (m_uid_to_idx)
6811 unsigned puid = DECL_UID (param);
6812 const ipa_uid_to_idx_map_elt *res
6813 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6814 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6816 return elt.uid < uid;
6818 if (res == m_uid_to_idx->end ()
6819 || res->uid != puid)
6821 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6822 return -1;
6824 return res->index;
6827 unsigned index = 0;
6828 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6829 if (p == param)
6830 return (int) index;
6832 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6833 return -1;
6836 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6837 according to the uid. */
6839 static int
6840 compare_uids (const void *a, const void *b)
6842 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6843 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6844 if (e1->uid < e2->uid)
6845 return -1;
6846 if (e1->uid > e2->uid)
6847 return 1;
6848 gcc_unreachable ();
6851 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6852 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6853 from parameters to their indices in DECL_ARGUMENTS chain. */
6855 void
6856 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6858 int c = count_formal_params (fndecl);
6859 if (c < 32)
6860 return;
6862 m_uid_to_idx = NULL;
6863 vec_safe_reserve (m_uid_to_idx, c, true);
6864 unsigned index = 0;
6865 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6867 ipa_uid_to_idx_map_elt elt;
6868 elt.uid = DECL_UID (p);
6869 elt.index = index;
6870 m_uid_to_idx->quick_push (elt);
6872 m_uid_to_idx->qsort (compare_uids);