MATCH: Improve `A CMP 0 ? A : -A` set of patterns to use bitwise_equal_p.
[official-gcc.git] / gcc / ipa-cp.cc
blobe0560301b90c60027b63b445bdbcf2edfef89842
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
131 #include "gimple-range.h"
133 template <typename valtype> class ipcp_value;
135 /* Describes a particular source for an IPA-CP value. */
137 template <typename valtype>
138 struct ipcp_value_source
140 public:
141 /* Aggregate offset of the source, negative if the source is scalar value of
142 the argument itself. */
143 HOST_WIDE_INT offset;
144 /* The incoming edge that brought the value. */
145 cgraph_edge *cs;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the ipcp_value of the caller from which the described
148 value has been derived. Otherwise it is NULL. */
149 ipcp_value<valtype> *val;
150 /* Next pointer in a linked list of sources of a value. */
151 ipcp_value_source *next;
152 /* If the jump function that resulted into his value was a pass-through or an
153 ancestor, this is the index of the parameter of the caller the jump
154 function references. */
155 int index;
158 /* Common ancestor for all ipcp_value instantiations. */
160 class ipcp_value_base
162 public:
163 /* Time benefit and that specializing the function for this value would bring
164 about in this function alone. */
165 sreal local_time_benefit;
166 /* Time benefit that specializing the function for this value can bring about
167 in it's callees. */
168 sreal prop_time_benefit;
169 /* Size cost that specializing the function for this value would bring about
170 in this function alone. */
171 int local_size_cost;
172 /* Size cost that specializing the function for this value can bring about in
173 it's callees. */
174 int prop_size_cost;
176 ipcp_value_base ()
177 : local_time_benefit (0), prop_time_benefit (0),
178 local_size_cost (0), prop_size_cost (0) {}
181 /* Describes one particular value stored in struct ipcp_lattice. */
183 template <typename valtype>
184 class ipcp_value : public ipcp_value_base
186 public:
187 /* The actual value for the given parameter. */
188 valtype value;
189 /* The list of sources from which this value originates. */
190 ipcp_value_source <valtype> *sources = nullptr;
191 /* Next pointers in a linked list of all values in a lattice. */
192 ipcp_value *next = nullptr;
193 /* Next pointers in a linked list of values in a strongly connected component
194 of values. */
195 ipcp_value *scc_next = nullptr;
196 /* Next pointers in a linked list of SCCs of values sorted topologically
197 according their sources. */
198 ipcp_value *topo_next = nullptr;
199 /* A specialized node created for this value, NULL if none has been (so far)
200 created. */
201 cgraph_node *spec_node = nullptr;
202 /* Depth first search number and low link for topological sorting of
203 values. */
204 int dfs = 0;
205 int low_link = 0;
206 /* SCC number to identify values which recursively feed into each other.
207 Values in the same SCC have the same SCC number. */
208 int scc_no = 0;
209 /* Non zero if the value is generated from another value in the same lattice
210 for a self-recursive call, the actual number is how many times the
211 operation has been performed. In the unlikely event of the value being
212 present in two chains fo self-recursive value generation chains, it is the
213 maximum. */
214 unsigned self_recursion_generated_level = 0;
215 /* True if this value is currently on the topo-sort stack. */
216 bool on_stack = false;
218 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
219 HOST_WIDE_INT offset);
221 /* Return true if both THIS value and O feed into each other. */
223 bool same_scc (const ipcp_value<valtype> *o)
225 return o->scc_no == scc_no;
228 /* Return true, if a this value has been generated for a self-recursive call as
229 a result of an arithmetic pass-through jump-function acting on a value in
230 the same lattice function. */
232 bool self_recursion_generated_p ()
234 return self_recursion_generated_level > 0;
238 /* Lattice describing potential values of a formal parameter of a function, or
239 a part of an aggregate. TOP is represented by a lattice with zero values
240 and with contains_variable and bottom flags cleared. BOTTOM is represented
241 by a lattice with the bottom flag set. In that case, values and
242 contains_variable flag should be disregarded. */
244 template <typename valtype>
245 struct ipcp_lattice
247 public:
248 /* The list of known values and types in this lattice. Note that values are
249 not deallocated if a lattice is set to bottom because there may be value
250 sources referencing them. */
251 ipcp_value<valtype> *values;
252 /* Number of known values and types in this lattice. */
253 int values_count;
254 /* The lattice contains a variable component (in addition to values). */
255 bool contains_variable;
256 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 propagation). */
258 bool bottom;
260 inline bool is_single_const ();
261 inline bool set_to_bottom ();
262 inline bool set_contains_variable ();
263 bool add_value (valtype newval, cgraph_edge *cs,
264 ipcp_value<valtype> *src_val = NULL,
265 int src_idx = 0, HOST_WIDE_INT offset = -1,
266 ipcp_value<valtype> **val_p = NULL,
267 unsigned same_lat_gen_level = 0);
268 void print (FILE * f, bool dump_sources, bool dump_benefits);
271 /* Lattice of tree values with an offset to describe a part of an
272 aggregate. */
274 struct ipcp_agg_lattice : public ipcp_lattice<tree>
276 public:
277 /* Offset that is being described by this lattice. */
278 HOST_WIDE_INT offset;
279 /* Size so that we don't have to re-compute it every time we traverse the
280 list. Must correspond to TYPE_SIZE of all lat values. */
281 HOST_WIDE_INT size;
282 /* Next element of the linked list. */
283 struct ipcp_agg_lattice *next;
286 /* Lattice of known bits, only capable of holding one value.
287 Bitwise constant propagation propagates which bits of a
288 value are constant.
289 For eg:
290 int f(int x)
292 return some_op (x);
295 int f1(int y)
297 if (cond)
298 return f (y & 0xff);
299 else
300 return f (y & 0xf);
303 In the above case, the param 'x' will always have all
304 the bits (except the bits in lsb) set to 0.
305 Hence the mask of 'x' would be 0xff. The mask
306 reflects that the bits in lsb are unknown.
307 The actual propagated value is given by m_value & ~m_mask. */
309 class ipcp_bits_lattice
311 public:
312 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
313 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
314 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
315 bool set_to_bottom ();
316 bool set_to_constant (widest_int, widest_int);
317 bool known_nonzero_p () const;
319 widest_int get_value () const { return m_value; }
320 widest_int get_mask () const { return m_mask; }
322 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
323 enum tree_code, tree, bool);
325 bool meet_with (widest_int, widest_int, unsigned);
327 void print (FILE *);
329 private:
330 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
332 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
333 If a bit in mask is set to 0, then the corresponding bit in
334 value is known to be constant. */
335 widest_int m_value, m_mask;
337 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
338 void get_value_and_mask (tree, widest_int *, widest_int *);
341 /* Lattice of value ranges. */
343 class ipcp_vr_lattice
345 public:
346 Value_Range m_vr;
348 inline bool bottom_p () const;
349 inline bool top_p () const;
350 inline bool set_to_bottom ();
351 bool meet_with (const vrange &p_vr);
352 bool meet_with (const ipcp_vr_lattice &other);
353 void init (tree type);
354 void print (FILE * f);
356 private:
357 bool meet_with_1 (const vrange &other_vr);
360 inline void
361 ipcp_vr_lattice::init (tree type)
363 if (type)
364 m_vr.set_type (type);
366 // Otherwise m_vr will default to unsupported_range.
369 /* Structure containing lattices for a parameter itself and for pieces of
370 aggregates that are passed in the parameter or by a reference in a parameter
371 plus some other useful flags. */
373 class ipcp_param_lattices
375 public:
376 /* Lattice describing the value of the parameter itself. */
377 ipcp_lattice<tree> itself;
378 /* Lattice describing the polymorphic contexts of a parameter. */
379 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
380 /* Lattices describing aggregate parts. */
381 ipcp_agg_lattice *aggs;
382 /* Lattice describing known bits. */
383 ipcp_bits_lattice bits_lattice;
384 /* Lattice describing value range. */
385 ipcp_vr_lattice m_value_range;
386 /* Number of aggregate lattices */
387 int aggs_count;
388 /* True if aggregate data were passed by reference (as opposed to by
389 value). */
390 bool aggs_by_ref;
391 /* All aggregate lattices contain a variable component (in addition to
392 values). */
393 bool aggs_contain_variable;
394 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
395 for any propagation). */
396 bool aggs_bottom;
398 /* There is a virtual call based on this parameter. */
399 bool virt_call;
402 /* Allocation pools for values and their sources in ipa-cp. */
404 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
405 ("IPA-CP constant values");
407 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
408 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
410 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
411 ("IPA-CP value sources");
413 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
414 ("IPA_CP aggregate lattices");
416 /* Base count to use in heuristics when using profile feedback. */
418 static profile_count base_count;
420 /* Original overall size of the program. */
422 static long overall_size, orig_overall_size;
424 /* Node name to unique clone suffix number map. */
425 static hash_map<const char *, unsigned> *clone_num_suffixes;
427 /* Return the param lattices structure corresponding to the Ith formal
428 parameter of the function described by INFO. */
429 static inline class ipcp_param_lattices *
430 ipa_get_parm_lattices (class ipa_node_params *info, int i)
432 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
433 gcc_checking_assert (!info->ipcp_orig_node);
434 gcc_checking_assert (info->lattices);
435 return &(info->lattices[i]);
438 /* Return the lattice corresponding to the scalar value of the Ith formal
439 parameter of the function described by INFO. */
440 static inline ipcp_lattice<tree> *
441 ipa_get_scalar_lat (class ipa_node_params *info, int i)
443 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
444 return &plats->itself;
447 /* Return the lattice corresponding to the scalar value of the Ith formal
448 parameter of the function described by INFO. */
449 static inline ipcp_lattice<ipa_polymorphic_call_context> *
450 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
452 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
453 return &plats->ctxlat;
456 /* Return whether LAT is a lattice with a single constant and without an
457 undefined value. */
459 template <typename valtype>
460 inline bool
461 ipcp_lattice<valtype>::is_single_const ()
463 if (bottom || contains_variable || values_count != 1)
464 return false;
465 else
466 return true;
469 /* Return true iff X and Y should be considered equal values by IPA-CP. */
471 static bool
472 values_equal_for_ipcp_p (tree x, tree y)
474 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
476 if (x == y)
477 return true;
479 if (TREE_CODE (x) == ADDR_EXPR
480 && TREE_CODE (y) == ADDR_EXPR
481 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
482 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
483 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
484 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
485 else
486 return operand_equal_p (x, y, 0);
489 /* Print V which is extracted from a value in a lattice to F. */
491 static void
492 print_ipcp_constant_value (FILE * f, tree v)
494 if (TREE_CODE (v) == ADDR_EXPR
495 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
497 fprintf (f, "& ");
498 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
500 else
501 print_generic_expr (f, v);
504 /* Print V which is extracted from a value in a lattice to F. */
506 static void
507 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
509 v.dump(f, false);
512 /* Print a lattice LAT to F. */
514 template <typename valtype>
515 void
516 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
518 ipcp_value<valtype> *val;
519 bool prev = false;
521 if (bottom)
523 fprintf (f, "BOTTOM\n");
524 return;
527 if (!values_count && !contains_variable)
529 fprintf (f, "TOP\n");
530 return;
533 if (contains_variable)
535 fprintf (f, "VARIABLE");
536 prev = true;
537 if (dump_benefits)
538 fprintf (f, "\n");
541 for (val = values; val; val = val->next)
543 if (dump_benefits && prev)
544 fprintf (f, " ");
545 else if (!dump_benefits && prev)
546 fprintf (f, ", ");
547 else
548 prev = true;
550 print_ipcp_constant_value (f, val->value);
552 if (dump_sources)
554 ipcp_value_source<valtype> *s;
556 if (val->self_recursion_generated_p ())
557 fprintf (f, " [self_gen(%i), from:",
558 val->self_recursion_generated_level);
559 else
560 fprintf (f, " [scc: %i, from:", val->scc_no);
561 for (s = val->sources; s; s = s->next)
562 fprintf (f, " %i(%f)", s->cs->caller->order,
563 s->cs->sreal_frequency ().to_double ());
564 fprintf (f, "]");
567 if (dump_benefits)
568 fprintf (f, " [loc_time: %g, loc_size: %i, "
569 "prop_time: %g, prop_size: %i]\n",
570 val->local_time_benefit.to_double (), val->local_size_cost,
571 val->prop_time_benefit.to_double (), val->prop_size_cost);
573 if (!dump_benefits)
574 fprintf (f, "\n");
577 void
578 ipcp_bits_lattice::print (FILE *f)
580 if (top_p ())
581 fprintf (f, " Bits unknown (TOP)\n");
582 else if (bottom_p ())
583 fprintf (f, " Bits unusable (BOTTOM)\n");
584 else
586 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
587 fprintf (f, ", mask = "); print_hex (get_mask (), f);
588 fprintf (f, "\n");
592 /* Print value range lattice to F. */
594 void
595 ipcp_vr_lattice::print (FILE * f)
597 m_vr.dump (f);
600 /* Print all ipcp_lattices of all functions to F. */
602 static void
603 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
605 struct cgraph_node *node;
606 int i, count;
608 fprintf (f, "\nLattices:\n");
609 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
611 class ipa_node_params *info;
613 info = ipa_node_params_sum->get (node);
614 /* Skip unoptimized functions and constprop clones since we don't make
615 lattices for them. */
616 if (!info || info->ipcp_orig_node)
617 continue;
618 fprintf (f, " Node: %s:\n", node->dump_name ());
619 count = ipa_get_param_count (info);
620 for (i = 0; i < count; i++)
622 struct ipcp_agg_lattice *aglat;
623 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
624 fprintf (f, " param [%d]: ", i);
625 plats->itself.print (f, dump_sources, dump_benefits);
626 fprintf (f, " ctxs: ");
627 plats->ctxlat.print (f, dump_sources, dump_benefits);
628 plats->bits_lattice.print (f);
629 fprintf (f, " ");
630 plats->m_value_range.print (f);
631 fprintf (f, "\n");
632 if (plats->virt_call)
633 fprintf (f, " virt_call flag set\n");
635 if (plats->aggs_bottom)
637 fprintf (f, " AGGS BOTTOM\n");
638 continue;
640 if (plats->aggs_contain_variable)
641 fprintf (f, " AGGS VARIABLE\n");
642 for (aglat = plats->aggs; aglat; aglat = aglat->next)
644 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
645 plats->aggs_by_ref ? "ref " : "", aglat->offset);
646 aglat->print (f, dump_sources, dump_benefits);
652 /* Determine whether it is at all technically possible to create clones of NODE
653 and store this information in the ipa_node_params structure associated
654 with NODE. */
656 static void
657 determine_versionability (struct cgraph_node *node,
658 class ipa_node_params *info)
660 const char *reason = NULL;
662 /* There are a number of generic reasons functions cannot be versioned. We
663 also cannot remove parameters if there are type attributes such as fnspec
664 present. */
665 if (node->alias || node->thunk)
666 reason = "alias or thunk";
667 else if (!node->versionable)
668 reason = "not a tree_versionable_function";
669 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
670 reason = "insufficient body availability";
671 else if (!opt_for_fn (node->decl, optimize)
672 || !opt_for_fn (node->decl, flag_ipa_cp))
673 reason = "non-optimized function";
674 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
676 /* Ideally we should clone the SIMD clones themselves and create
677 vector copies of them, so IPA-cp and SIMD clones can happily
678 coexist, but that may not be worth the effort. */
679 reason = "function has SIMD clones";
681 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
683 /* Ideally we should clone the target clones themselves and create
684 copies of them, so IPA-cp and target clones can happily
685 coexist, but that may not be worth the effort. */
686 reason = "function target_clones attribute";
688 /* Don't clone decls local to a comdat group; it breaks and for C++
689 decloned constructors, inlining is always better anyway. */
690 else if (node->comdat_local_p ())
691 reason = "comdat-local function";
692 else if (node->calls_comdat_local)
694 /* TODO: call is versionable if we make sure that all
695 callers are inside of a comdat group. */
696 reason = "calls comdat-local function";
699 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
700 work only when inlined. Cloning them may still lead to better code
701 because ipa-cp will not give up on cloning further. If the function is
702 external this however leads to wrong code because we may end up producing
703 offline copy of the function. */
704 if (DECL_EXTERNAL (node->decl))
705 for (cgraph_edge *edge = node->callees; !reason && edge;
706 edge = edge->next_callee)
707 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
709 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
710 reason = "external function which calls va_arg_pack";
711 if (DECL_FUNCTION_CODE (edge->callee->decl)
712 == BUILT_IN_VA_ARG_PACK_LEN)
713 reason = "external function which calls va_arg_pack_len";
716 if (reason && dump_file && !node->alias && !node->thunk)
717 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
718 node->dump_name (), reason);
720 info->versionable = (reason == NULL);
723 /* Return true if it is at all technically possible to create clones of a
724 NODE. */
726 static bool
727 ipcp_versionable_function_p (struct cgraph_node *node)
729 ipa_node_params *info = ipa_node_params_sum->get (node);
730 return info && info->versionable;
733 /* Structure holding accumulated information about callers of a node. */
735 struct caller_statistics
737 /* If requested (see below), self-recursive call counts are summed into this
738 field. */
739 profile_count rec_count_sum;
740 /* The sum of all ipa counts of all the other (non-recursive) calls. */
741 profile_count count_sum;
742 /* Sum of all frequencies for all calls. */
743 sreal freq_sum;
744 /* Number of calls and hot calls respectively. */
745 int n_calls, n_hot_calls;
746 /* If itself is set up, also count the number of non-self-recursive
747 calls. */
748 int n_nonrec_calls;
749 /* If non-NULL, this is the node itself and calls from it should have their
750 counts included in rec_count_sum and not count_sum. */
751 cgraph_node *itself;
754 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
755 from IGNORED_CALLER are not counted. */
757 static inline void
758 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
760 stats->rec_count_sum = profile_count::zero ();
761 stats->count_sum = profile_count::zero ();
762 stats->n_calls = 0;
763 stats->n_hot_calls = 0;
764 stats->n_nonrec_calls = 0;
765 stats->freq_sum = 0;
766 stats->itself = itself;
769 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
770 non-thunk incoming edges to NODE. */
772 static bool
773 gather_caller_stats (struct cgraph_node *node, void *data)
775 struct caller_statistics *stats = (struct caller_statistics *) data;
776 struct cgraph_edge *cs;
778 for (cs = node->callers; cs; cs = cs->next_caller)
779 if (!cs->caller->thunk)
781 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
782 if (info && info->node_dead)
783 continue;
785 if (cs->count.ipa ().initialized_p ())
787 if (stats->itself && stats->itself == cs->caller)
788 stats->rec_count_sum += cs->count.ipa ();
789 else
790 stats->count_sum += cs->count.ipa ();
792 stats->freq_sum += cs->sreal_frequency ();
793 stats->n_calls++;
794 if (stats->itself && stats->itself != cs->caller)
795 stats->n_nonrec_calls++;
797 if (cs->maybe_hot_p ())
798 stats->n_hot_calls ++;
800 return false;
804 /* Return true if this NODE is viable candidate for cloning. */
806 static bool
807 ipcp_cloning_candidate_p (struct cgraph_node *node)
809 struct caller_statistics stats;
811 gcc_checking_assert (node->has_gimple_body_p ());
813 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
815 if (dump_file)
816 fprintf (dump_file, "Not considering %s for cloning; "
817 "-fipa-cp-clone disabled.\n",
818 node->dump_name ());
819 return false;
822 if (node->optimize_for_size_p ())
824 if (dump_file)
825 fprintf (dump_file, "Not considering %s for cloning; "
826 "optimizing it for size.\n",
827 node->dump_name ());
828 return false;
831 init_caller_stats (&stats);
832 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
834 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
836 if (dump_file)
837 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
838 node->dump_name ());
839 return true;
842 /* When profile is available and function is hot, propagate into it even if
843 calls seems cold; constant propagation can improve function's speed
844 significantly. */
845 if (stats.count_sum > profile_count::zero ()
846 && node->count.ipa ().initialized_p ())
848 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
850 if (dump_file)
851 fprintf (dump_file, "Considering %s for cloning; "
852 "usually called directly.\n",
853 node->dump_name ());
854 return true;
857 if (!stats.n_hot_calls)
859 if (dump_file)
860 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
861 node->dump_name ());
862 return false;
864 if (dump_file)
865 fprintf (dump_file, "Considering %s for cloning.\n",
866 node->dump_name ());
867 return true;
870 template <typename valtype>
871 class value_topo_info
873 public:
874 /* Head of the linked list of topologically sorted values. */
875 ipcp_value<valtype> *values_topo;
876 /* Stack for creating SCCs, represented by a linked list too. */
877 ipcp_value<valtype> *stack;
878 /* Counter driving the algorithm in add_val_to_toposort. */
879 int dfs_counter;
881 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
883 void add_val (ipcp_value<valtype> *cur_val);
884 void propagate_effects ();
887 /* Arrays representing a topological ordering of call graph nodes and a stack
888 of nodes used during constant propagation and also data required to perform
889 topological sort of values and propagation of benefits in the determined
890 order. */
892 class ipa_topo_info
894 public:
895 /* Array with obtained topological order of cgraph nodes. */
896 struct cgraph_node **order;
897 /* Stack of cgraph nodes used during propagation within SCC until all values
898 in the SCC stabilize. */
899 struct cgraph_node **stack;
900 int nnodes, stack_top;
902 value_topo_info<tree> constants;
903 value_topo_info<ipa_polymorphic_call_context> contexts;
905 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
906 constants ()
910 /* Skip edges from and to nodes without ipa_cp enabled.
911 Ignore not available symbols. */
913 static bool
914 ignore_edge_p (cgraph_edge *e)
916 enum availability avail;
917 cgraph_node *ultimate_target
918 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
920 return (avail <= AVAIL_INTERPOSABLE
921 || !opt_for_fn (ultimate_target->decl, optimize)
922 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
925 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
927 static void
928 build_toporder_info (class ipa_topo_info *topo)
930 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
931 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
933 gcc_checking_assert (topo->stack_top == 0);
934 topo->nnodes = ipa_reduced_postorder (topo->order, true,
935 ignore_edge_p);
938 /* Free information about strongly connected components and the arrays in
939 TOPO. */
941 static void
942 free_toporder_info (class ipa_topo_info *topo)
944 ipa_free_postorder_info ();
945 free (topo->order);
946 free (topo->stack);
949 /* Add NODE to the stack in TOPO, unless it is already there. */
951 static inline void
952 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
954 ipa_node_params *info = ipa_node_params_sum->get (node);
955 if (info->node_enqueued)
956 return;
957 info->node_enqueued = 1;
958 topo->stack[topo->stack_top++] = node;
961 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
962 is empty. */
964 static struct cgraph_node *
965 pop_node_from_stack (class ipa_topo_info *topo)
967 if (topo->stack_top)
969 struct cgraph_node *node;
970 topo->stack_top--;
971 node = topo->stack[topo->stack_top];
972 ipa_node_params_sum->get (node)->node_enqueued = 0;
973 return node;
975 else
976 return NULL;
979 /* Set lattice LAT to bottom and return true if it previously was not set as
980 such. */
982 template <typename valtype>
983 inline bool
984 ipcp_lattice<valtype>::set_to_bottom ()
986 bool ret = !bottom;
987 bottom = true;
988 return ret;
991 /* Mark lattice as containing an unknown value and return true if it previously
992 was not marked as such. */
994 template <typename valtype>
995 inline bool
996 ipcp_lattice<valtype>::set_contains_variable ()
998 bool ret = !contains_variable;
999 contains_variable = true;
1000 return ret;
1003 /* Set all aggregate lattices in PLATS to bottom and return true if they were
1004 not previously set as such. */
1006 static inline bool
1007 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
1009 bool ret = !plats->aggs_bottom;
1010 plats->aggs_bottom = true;
1011 return ret;
1014 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1015 return true if they were not previously marked as such. */
1017 static inline bool
1018 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1020 bool ret = !plats->aggs_contain_variable;
1021 plats->aggs_contain_variable = true;
1022 return ret;
1025 bool
1026 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1028 return meet_with_1 (other.m_vr);
1031 /* Meet the current value of the lattice with the range described by
1032 P_VR. */
1034 bool
1035 ipcp_vr_lattice::meet_with (const vrange &p_vr)
1037 return meet_with_1 (p_vr);
1040 /* Meet the current value of the lattice with the range described by
1041 OTHER_VR. Return TRUE if anything changed. */
1043 bool
1044 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
1046 if (bottom_p ())
1047 return false;
1049 if (other_vr.varying_p ())
1050 return set_to_bottom ();
1052 bool res;
1053 if (flag_checking)
1055 Value_Range save (m_vr);
1056 res = m_vr.union_ (other_vr);
1057 gcc_assert (res == (m_vr != save));
1059 else
1060 res = m_vr.union_ (other_vr);
1061 return res;
1064 /* Return true if value range information in the lattice is yet unknown. */
1066 bool
1067 ipcp_vr_lattice::top_p () const
1069 return m_vr.undefined_p ();
1072 /* Return true if value range information in the lattice is known to be
1073 unusable. */
1075 bool
1076 ipcp_vr_lattice::bottom_p () const
1078 return m_vr.varying_p ();
1081 /* Set value range information in the lattice to bottom. Return true if it
1082 previously was in a different state. */
1084 bool
1085 ipcp_vr_lattice::set_to_bottom ()
1087 if (m_vr.varying_p ())
1088 return false;
1090 /* Setting an unsupported type here forces the temporary to default
1091 to unsupported_range, which can handle VARYING/DEFINED ranges,
1092 but nothing else (union, intersect, etc). This allows us to set
1093 bottoms on any ranges, and is safe as all users of the lattice
1094 check for bottom first. */
1095 m_vr.set_type (void_type_node);
1096 m_vr.set_varying (void_type_node);
1098 return true;
1101 /* Set lattice value to bottom, if it already isn't the case. */
1103 bool
1104 ipcp_bits_lattice::set_to_bottom ()
1106 if (bottom_p ())
1107 return false;
1108 m_lattice_val = IPA_BITS_VARYING;
1109 m_value = 0;
1110 m_mask = -1;
1111 return true;
1114 /* Set to constant if it isn't already. Only meant to be called
1115 when switching state from TOP. */
1117 bool
1118 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1120 gcc_assert (top_p ());
1121 m_lattice_val = IPA_BITS_CONSTANT;
1122 m_value = wi::bit_and (wi::bit_not (mask), value);
1123 m_mask = mask;
1124 return true;
1127 /* Return true if any of the known bits are non-zero. */
1129 bool
1130 ipcp_bits_lattice::known_nonzero_p () const
1132 if (!constant_p ())
1133 return false;
1134 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1137 /* Convert operand to value, mask form. */
1139 void
1140 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1142 wide_int get_nonzero_bits (const_tree);
1144 if (TREE_CODE (operand) == INTEGER_CST)
1146 *valuep = wi::to_widest (operand);
1147 *maskp = 0;
1149 else
1151 *valuep = 0;
1152 *maskp = -1;
1156 /* Meet operation, similar to ccp_lattice_meet, we xor values
1157 if this->value, value have different values at same bit positions, we want
1158 to drop that bit to varying. Return true if mask is changed.
1159 This function assumes that the lattice value is in CONSTANT state. If
1160 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1162 bool
1163 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1164 unsigned precision, bool drop_all_ones)
1166 gcc_assert (constant_p ());
1168 widest_int old_mask = m_mask;
1169 m_mask = (m_mask | mask) | (m_value ^ value);
1170 if (drop_all_ones)
1171 m_mask |= m_value;
1172 m_value &= ~m_mask;
1174 if (wi::sext (m_mask, precision) == -1)
1175 return set_to_bottom ();
1177 return m_mask != old_mask;
1180 /* Meet the bits lattice with operand
1181 described by <value, mask, sgn, precision. */
1183 bool
1184 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1185 unsigned precision)
1187 if (bottom_p ())
1188 return false;
1190 if (top_p ())
1192 if (wi::sext (mask, precision) == -1)
1193 return set_to_bottom ();
1194 return set_to_constant (value, mask);
1197 return meet_with_1 (value, mask, precision, false);
1200 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1201 if code is binary operation or bit_value_unop (other) if code is unary op.
1202 In the case when code is nop_expr, no adjustment is required. If
1203 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1205 bool
1206 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1207 signop sgn, enum tree_code code, tree operand,
1208 bool drop_all_ones)
1210 if (other.bottom_p ())
1211 return set_to_bottom ();
1213 if (bottom_p () || other.top_p ())
1214 return false;
1216 widest_int adjusted_value, adjusted_mask;
1218 if (TREE_CODE_CLASS (code) == tcc_binary)
1220 tree type = TREE_TYPE (operand);
1221 widest_int o_value, o_mask;
1222 get_value_and_mask (operand, &o_value, &o_mask);
1224 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1225 sgn, precision, other.get_value (), other.get_mask (),
1226 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1228 if (wi::sext (adjusted_mask, precision) == -1)
1229 return set_to_bottom ();
1232 else if (TREE_CODE_CLASS (code) == tcc_unary)
1234 bit_value_unop (code, sgn, precision, &adjusted_value,
1235 &adjusted_mask, sgn, precision, other.get_value (),
1236 other.get_mask ());
1238 if (wi::sext (adjusted_mask, precision) == -1)
1239 return set_to_bottom ();
1242 else
1243 return set_to_bottom ();
1245 if (top_p ())
1247 if (drop_all_ones)
1249 adjusted_mask |= adjusted_value;
1250 adjusted_value &= ~adjusted_mask;
1252 if (wi::sext (adjusted_mask, precision) == -1)
1253 return set_to_bottom ();
1254 return set_to_constant (adjusted_value, adjusted_mask);
1256 else
1257 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1258 drop_all_ones);
1261 /* Dump the contents of the list to FILE. */
1263 void
1264 ipa_argagg_value_list::dump (FILE *f)
1266 bool comma = false;
1267 for (const ipa_argagg_value &av : m_elts)
1269 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1270 av.index, av.unit_offset);
1271 print_generic_expr (f, av.value);
1272 if (av.by_ref)
1273 fprintf (f, "(by_ref)");
1274 comma = true;
1276 fprintf (f, "\n");
1279 /* Dump the contents of the list to stderr. */
1281 void
1282 ipa_argagg_value_list::debug ()
1284 dump (stderr);
1287 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1288 NULL if there is no such constant. */
1290 const ipa_argagg_value *
1291 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1293 ipa_argagg_value key;
1294 key.index = index;
1295 key.unit_offset = unit_offset;
1296 const ipa_argagg_value *res
1297 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1298 [] (const ipa_argagg_value &elt,
1299 const ipa_argagg_value &val)
1301 if (elt.index < val.index)
1302 return true;
1303 if (elt.index > val.index)
1304 return false;
1305 if (elt.unit_offset < val.unit_offset)
1306 return true;
1307 return false;
1310 if (res == m_elts.end ()
1311 || res->index != index
1312 || res->unit_offset != unit_offset)
1313 res = nullptr;
1315 /* TODO: perhaps remove the check (that the underlying array is indeed
1316 sorted) if it turns out it can be too slow? */
1317 if (!flag_checking)
1318 return res;
1320 const ipa_argagg_value *slow_res = NULL;
1321 int prev_index = -1;
1322 unsigned prev_unit_offset = 0;
1323 for (const ipa_argagg_value &av : m_elts)
1325 gcc_assert (prev_index < 0
1326 || prev_index < av.index
1327 || prev_unit_offset < av.unit_offset);
1328 prev_index = av.index;
1329 prev_unit_offset = av.unit_offset;
1330 if (av.index == index
1331 && av.unit_offset == unit_offset)
1332 slow_res = &av;
1334 gcc_assert (res == slow_res);
1336 return res;
1339 /* Return the first item describing a constant stored for parameter with INDEX,
1340 regardless of offset or reference, or NULL if there is no such constant. */
1342 const ipa_argagg_value *
1343 ipa_argagg_value_list::get_elt_for_index (int index) const
1345 const ipa_argagg_value *res
1346 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1347 [] (const ipa_argagg_value &elt, unsigned idx)
1349 return elt.index < idx;
1351 if (res == m_elts.end ()
1352 || res->index != index)
1353 res = nullptr;
1354 return res;
1357 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1358 performing any check of whether value is passed by reference, or NULL_TREE
1359 if there is no such constant. */
1361 tree
1362 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1364 const ipa_argagg_value *av = get_elt (index, unit_offset);
1365 return av ? av->value : NULL_TREE;
1368 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1369 passed by reference or not according to BY_REF, or NULL_TREE if there is
1370 no such constant. */
1372 tree
1373 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1374 bool by_ref) const
1376 const ipa_argagg_value *av = get_elt (index, unit_offset);
1377 if (av && av->by_ref == by_ref)
1378 return av->value;
1379 return NULL_TREE;
1382 /* Return true if all elements present in OTHER are also present in this
1383 list. */
1385 bool
1386 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1388 unsigned j = 0;
1389 for (unsigned i = 0; i < other.m_elts.size (); i++)
1391 unsigned other_index = other.m_elts[i].index;
1392 unsigned other_offset = other.m_elts[i].unit_offset;
1394 while (j < m_elts.size ()
1395 && (m_elts[j].index < other_index
1396 || (m_elts[j].index == other_index
1397 && m_elts[j].unit_offset < other_offset)))
1398 j++;
1400 if (j >= m_elts.size ()
1401 || m_elts[j].index != other_index
1402 || m_elts[j].unit_offset != other_offset
1403 || m_elts[j].by_ref != other.m_elts[i].by_ref
1404 || !m_elts[j].value
1405 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1406 return false;
1408 return true;
1411 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1412 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1413 offsets but skip those which would end up with a negative offset. */
1415 void
1416 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1417 unsigned dest_index,
1418 unsigned unit_delta,
1419 vec<ipa_argagg_value> *res) const
1421 const ipa_argagg_value *av = get_elt_for_index (src_index);
1422 if (!av)
1423 return;
1424 unsigned prev_unit_offset = 0;
1425 bool first = true;
1426 for (; av < m_elts.end (); ++av)
1428 if (av->index > src_index)
1429 return;
1430 if (av->index == src_index
1431 && (av->unit_offset >= unit_delta)
1432 && av->value)
1434 ipa_argagg_value new_av;
1435 gcc_checking_assert (av->value);
1436 new_av.value = av->value;
1437 new_av.unit_offset = av->unit_offset - unit_delta;
1438 new_av.index = dest_index;
1439 new_av.by_ref = av->by_ref;
1441 /* Quick check that the offsets we push are indeed increasing. */
1442 gcc_assert (first
1443 || new_av.unit_offset > prev_unit_offset);
1444 prev_unit_offset = new_av.unit_offset;
1445 first = false;
1447 res->safe_push (new_av);
1452 /* Push to RES information about single lattices describing aggregate values in
1453 PLATS as those describing parameter DEST_INDEX and the original offset minus
1454 UNIT_DELTA. Return true if any item has been pushed to RES. */
1456 static bool
1457 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1458 unsigned unit_delta,
1459 vec<ipa_argagg_value> *res)
1461 if (plats->aggs_contain_variable)
1462 return false;
1464 bool pushed_sth = false;
1465 bool first = true;
1466 unsigned prev_unit_offset = 0;
1467 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1468 if (aglat->is_single_const ()
1469 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1471 ipa_argagg_value iav;
1472 iav.value = aglat->values->value;
1473 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1474 iav.index = dest_index;
1475 iav.by_ref = plats->aggs_by_ref;
1477 gcc_assert (first
1478 || iav.unit_offset > prev_unit_offset);
1479 prev_unit_offset = iav.unit_offset;
1480 first = false;
1482 pushed_sth = true;
1483 res->safe_push (iav);
1485 return pushed_sth;
1488 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1489 Return the number of remaining valid entries. */
1491 static unsigned
1492 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1493 const vec<ipa_argagg_value> &other)
1495 unsigned valid_entries = 0;
1496 unsigned j = 0;
1497 for (unsigned i = 0; i < elts.length (); i++)
1499 if (!elts[i].value)
1500 continue;
1502 unsigned this_index = elts[i].index;
1503 unsigned this_offset = elts[i].unit_offset;
1505 while (j < other.length ()
1506 && (other[j].index < this_index
1507 || (other[j].index == this_index
1508 && other[j].unit_offset < this_offset)))
1509 j++;
1511 if (j >= other.length ())
1513 elts[i].value = NULL_TREE;
1514 continue;
1517 if (other[j].index == this_index
1518 && other[j].unit_offset == this_offset
1519 && other[j].by_ref == elts[i].by_ref
1520 && other[j].value
1521 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1522 valid_entries++;
1523 else
1524 elts[i].value = NULL_TREE;
1526 return valid_entries;
1529 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1530 return true is any of them has not been marked as such so far. */
1532 static inline bool
1533 set_all_contains_variable (class ipcp_param_lattices *plats)
1535 bool ret;
1536 ret = plats->itself.set_contains_variable ();
1537 ret |= plats->ctxlat.set_contains_variable ();
1538 ret |= set_agg_lats_contain_variable (plats);
1539 ret |= plats->bits_lattice.set_to_bottom ();
1540 ret |= plats->m_value_range.set_to_bottom ();
1541 return ret;
1544 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1545 points to by the number of callers to NODE. */
1547 static bool
1548 count_callers (cgraph_node *node, void *data)
1550 int *caller_count = (int *) data;
1552 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1553 /* Local thunks can be handled transparently, but if the thunk cannot
1554 be optimized out, count it as a real use. */
1555 if (!cs->caller->thunk || !cs->caller->local)
1556 ++*caller_count;
1557 return false;
1560 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1561 the one caller of some other node. Set the caller's corresponding flag. */
1563 static bool
1564 set_single_call_flag (cgraph_node *node, void *)
1566 cgraph_edge *cs = node->callers;
1567 /* Local thunks can be handled transparently, skip them. */
1568 while (cs && cs->caller->thunk && cs->caller->local)
1569 cs = cs->next_caller;
1570 if (cs)
1571 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1573 info->node_calling_single_call = true;
1574 return true;
1576 return false;
1579 /* Initialize ipcp_lattices. */
1581 static void
1582 initialize_node_lattices (struct cgraph_node *node)
1584 ipa_node_params *info = ipa_node_params_sum->get (node);
1585 struct cgraph_edge *ie;
1586 bool disable = false, variable = false;
1587 int i;
1589 gcc_checking_assert (node->has_gimple_body_p ());
1591 if (!ipa_get_param_count (info))
1592 disable = true;
1593 else if (node->local)
1595 int caller_count = 0;
1596 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1597 true);
1598 gcc_checking_assert (caller_count > 0);
1599 if (caller_count == 1)
1600 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1601 NULL, true);
1603 else
1605 /* When cloning is allowed, we can assume that externally visible
1606 functions are not called. We will compensate this by cloning
1607 later. */
1608 if (ipcp_versionable_function_p (node)
1609 && ipcp_cloning_candidate_p (node))
1610 variable = true;
1611 else
1612 disable = true;
1615 if (dump_file && (dump_flags & TDF_DETAILS)
1616 && !node->alias && !node->thunk)
1618 fprintf (dump_file, "Initializing lattices of %s\n",
1619 node->dump_name ());
1620 if (disable || variable)
1621 fprintf (dump_file, " Marking all lattices as %s\n",
1622 disable ? "BOTTOM" : "VARIABLE");
1625 auto_vec<bool, 16> surviving_params;
1626 bool pre_modified = false;
1628 clone_info *cinfo = clone_info::get (node);
1630 if (!disable && cinfo && cinfo->param_adjustments)
1632 /* At the moment all IPA optimizations should use the number of
1633 parameters of the prevailing decl as the m_always_copy_start.
1634 Handling any other value would complicate the code below, so for the
1635 time bing let's only assert it is so. */
1636 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1637 == ipa_get_param_count (info))
1638 || cinfo->param_adjustments->m_always_copy_start < 0);
1640 pre_modified = true;
1641 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1643 if (dump_file && (dump_flags & TDF_DETAILS)
1644 && !node->alias && !node->thunk)
1646 bool first = true;
1647 for (int j = 0; j < ipa_get_param_count (info); j++)
1649 if (j < (int) surviving_params.length ()
1650 && surviving_params[j])
1651 continue;
1652 if (first)
1654 fprintf (dump_file,
1655 " The following parameters are dead on arrival:");
1656 first = false;
1658 fprintf (dump_file, " %u", j);
1660 if (!first)
1661 fprintf (dump_file, "\n");
1665 for (i = 0; i < ipa_get_param_count (info); i++)
1667 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1668 tree type = ipa_get_type (info, i);
1669 if (disable
1670 || !ipa_get_type (info, i)
1671 || (pre_modified && (surviving_params.length () <= (unsigned) i
1672 || !surviving_params[i])))
1674 plats->itself.set_to_bottom ();
1675 plats->ctxlat.set_to_bottom ();
1676 set_agg_lats_to_bottom (plats);
1677 plats->bits_lattice.set_to_bottom ();
1678 plats->m_value_range.init (type);
1679 plats->m_value_range.set_to_bottom ();
1681 else
1683 plats->m_value_range.init (type);
1684 if (variable)
1685 set_all_contains_variable (plats);
1689 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1690 if (ie->indirect_info->polymorphic
1691 && ie->indirect_info->param_index >= 0)
1693 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1694 ipa_get_parm_lattices (info,
1695 ie->indirect_info->param_index)->virt_call = 1;
1699 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1700 PARAM_TYPE. */
1702 static bool
1703 ipacp_value_safe_for_type (tree param_type, tree value)
1705 tree val_type = TREE_TYPE (value);
1706 if (param_type == val_type
1707 || useless_type_conversion_p (param_type, val_type)
1708 || fold_convertible_p (param_type, value))
1709 return true;
1710 else
1711 return false;
1714 /* Return the result of a (possibly arithmetic) operation on the constant
1715 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1716 the type of the parameter to which the result is passed. Return
1717 NULL_TREE if that cannot be determined or be considered an
1718 interprocedural invariant. */
1720 static tree
1721 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1722 tree res_type)
1724 tree res;
1726 if (opcode == NOP_EXPR)
1727 return input;
1728 if (!is_gimple_ip_invariant (input))
1729 return NULL_TREE;
1731 if (opcode == ASSERT_EXPR)
1733 if (values_equal_for_ipcp_p (input, operand))
1734 return input;
1735 else
1736 return NULL_TREE;
1739 if (!res_type)
1741 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1742 res_type = boolean_type_node;
1743 else if (expr_type_first_operand_type_p (opcode))
1744 res_type = TREE_TYPE (input);
1745 else
1746 return NULL_TREE;
1749 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1750 res = fold_unary (opcode, res_type, input);
1751 else
1752 res = fold_binary (opcode, res_type, input, operand);
1754 if (res && !is_gimple_ip_invariant (res))
1755 return NULL_TREE;
1757 return res;
1760 /* Return the result of a (possibly arithmetic) pass through jump function
1761 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1762 to which the result is passed. Return NULL_TREE if that cannot be
1763 determined or be considered an interprocedural invariant. */
1765 static tree
1766 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1767 tree res_type)
1769 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1770 input,
1771 ipa_get_jf_pass_through_operand (jfunc),
1772 res_type);
1775 /* Return the result of an ancestor jump function JFUNC on the constant value
1776 INPUT. Return NULL_TREE if that cannot be determined. */
1778 static tree
1779 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1781 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1782 if (TREE_CODE (input) == ADDR_EXPR)
1784 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1785 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1786 if (known_eq (off, 0))
1787 return input;
1788 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1789 return build1 (ADDR_EXPR, TREE_TYPE (input),
1790 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1791 build_int_cst (ptr_type_node, byte_offset)));
1793 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1794 && zerop (input))
1795 return input;
1796 else
1797 return NULL_TREE;
1800 /* Determine whether JFUNC evaluates to a single known constant value and if
1801 so, return it. Otherwise return NULL. INFO describes the caller node or
1802 the one it is inlined to, so that pass-through jump functions can be
1803 evaluated. PARM_TYPE is the type of the parameter to which the result is
1804 passed. */
1806 tree
1807 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1808 tree parm_type)
1810 if (jfunc->type == IPA_JF_CONST)
1811 return ipa_get_jf_constant (jfunc);
1812 else if (jfunc->type == IPA_JF_PASS_THROUGH
1813 || jfunc->type == IPA_JF_ANCESTOR)
1815 tree input;
1816 int idx;
1818 if (jfunc->type == IPA_JF_PASS_THROUGH)
1819 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1820 else
1821 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1823 if (info->ipcp_orig_node)
1824 input = info->known_csts[idx];
1825 else
1827 ipcp_lattice<tree> *lat;
1829 if (!info->lattices
1830 || idx >= ipa_get_param_count (info))
1831 return NULL_TREE;
1832 lat = ipa_get_scalar_lat (info, idx);
1833 if (!lat->is_single_const ())
1834 return NULL_TREE;
1835 input = lat->values->value;
1838 if (!input)
1839 return NULL_TREE;
1841 if (jfunc->type == IPA_JF_PASS_THROUGH)
1842 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1843 else
1844 return ipa_get_jf_ancestor_result (jfunc, input);
1846 else
1847 return NULL_TREE;
1850 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1851 that INFO describes the caller node or the one it is inlined to, CS is the
1852 call graph edge corresponding to JFUNC and CSIDX index of the described
1853 parameter. */
1855 ipa_polymorphic_call_context
1856 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1857 ipa_jump_func *jfunc)
1859 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1860 ipa_polymorphic_call_context ctx;
1861 ipa_polymorphic_call_context *edge_ctx
1862 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1864 if (edge_ctx && !edge_ctx->useless_p ())
1865 ctx = *edge_ctx;
1867 if (jfunc->type == IPA_JF_PASS_THROUGH
1868 || jfunc->type == IPA_JF_ANCESTOR)
1870 ipa_polymorphic_call_context srcctx;
1871 int srcidx;
1872 bool type_preserved = true;
1873 if (jfunc->type == IPA_JF_PASS_THROUGH)
1875 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1876 return ctx;
1877 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1878 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1880 else
1882 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1883 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1885 if (info->ipcp_orig_node)
1887 if (info->known_contexts.exists ())
1888 srcctx = info->known_contexts[srcidx];
1890 else
1892 if (!info->lattices
1893 || srcidx >= ipa_get_param_count (info))
1894 return ctx;
1895 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1896 lat = ipa_get_poly_ctx_lat (info, srcidx);
1897 if (!lat->is_single_const ())
1898 return ctx;
1899 srcctx = lat->values->value;
1901 if (srcctx.useless_p ())
1902 return ctx;
1903 if (jfunc->type == IPA_JF_ANCESTOR)
1904 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1905 if (!type_preserved)
1906 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1907 srcctx.combine_with (ctx);
1908 return srcctx;
1911 return ctx;
1914 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1915 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1916 the result is a range that is not VARYING nor UNDEFINED. */
1918 static bool
1919 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1920 const vrange &src_vr,
1921 enum tree_code operation,
1922 tree dst_type, tree src_type)
1924 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1925 return false;
1927 range_op_handler handler (operation);
1928 if (!handler)
1929 return false;
1931 Value_Range varying (dst_type);
1932 varying.set_varying (dst_type);
1934 return (handler.fold_range (dst_vr, dst_type, src_vr, varying)
1935 && !dst_vr.varying_p ()
1936 && !dst_vr.undefined_p ());
1939 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1940 first be extracted onto a vrange. */
1942 static bool
1943 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1944 const ipa_vr &src_vr,
1945 enum tree_code operation,
1946 tree dst_type, tree src_type)
1948 Value_Range tmp;
1949 src_vr.get_vrange (tmp);
1950 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1951 dst_type, src_type);
1954 /* Determine range of JFUNC given that INFO describes the caller node or
1955 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1956 and PARM_TYPE of the parameter. */
1958 void
1959 ipa_value_range_from_jfunc (vrange &vr,
1960 ipa_node_params *info, cgraph_edge *cs,
1961 ipa_jump_func *jfunc, tree parm_type)
1963 vr.set_undefined ();
1965 if (jfunc->m_vr)
1966 ipa_vr_operation_and_type_effects (vr,
1967 *jfunc->m_vr,
1968 NOP_EXPR, parm_type,
1969 jfunc->m_vr->type ());
1970 if (vr.singleton_p ())
1971 return;
1972 if (jfunc->type == IPA_JF_PASS_THROUGH)
1974 int idx;
1975 ipcp_transformation *sum
1976 = ipcp_get_transformation_summary (cs->caller->inlined_to
1977 ? cs->caller->inlined_to
1978 : cs->caller);
1979 if (!sum || !sum->m_vr)
1980 return;
1982 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1984 if (!(*sum->m_vr)[idx].known_p ())
1985 return;
1986 tree vr_type = ipa_get_type (info, idx);
1987 Value_Range srcvr;
1988 (*sum->m_vr)[idx].get_vrange (srcvr);
1990 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1992 if (TREE_CODE_CLASS (operation) == tcc_unary)
1994 Value_Range res (vr_type);
1996 if (ipa_vr_operation_and_type_effects (res,
1997 srcvr,
1998 operation, parm_type,
1999 vr_type))
2000 vr.intersect (res);
2002 else
2004 Value_Range op_res (vr_type);
2005 Value_Range res (vr_type);
2006 tree op = ipa_get_jf_pass_through_operand (jfunc);
2007 Value_Range op_vr (vr_type);
2008 range_op_handler handler (operation);
2010 ipa_range_set_and_normalize (op_vr, op);
2012 if (!handler
2013 || !op_res.supports_type_p (vr_type)
2014 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
2015 op_res.set_varying (vr_type);
2017 if (ipa_vr_operation_and_type_effects (res,
2018 op_res,
2019 NOP_EXPR, parm_type,
2020 vr_type))
2021 vr.intersect (res);
2026 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
2027 single known constant value and if so, return it. Otherwise return NULL.
2028 NODE and INFO describes the caller node or the one it is inlined to, and
2029 its related info. */
2031 tree
2032 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
2033 const ipa_agg_jf_item *item)
2035 tree value = NULL_TREE;
2036 int src_idx;
2038 if (item->offset < 0
2039 || item->jftype == IPA_JF_UNKNOWN
2040 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
2041 return NULL_TREE;
2043 if (item->jftype == IPA_JF_CONST)
2044 return item->value.constant;
2046 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2047 || item->jftype == IPA_JF_LOAD_AGG);
2049 src_idx = item->value.pass_through.formal_id;
2051 if (info->ipcp_orig_node)
2053 if (item->jftype == IPA_JF_PASS_THROUGH)
2054 value = info->known_csts[src_idx];
2055 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2057 ipa_argagg_value_list avl (ts);
2058 value = avl.get_value (src_idx,
2059 item->value.load_agg.offset / BITS_PER_UNIT,
2060 item->value.load_agg.by_ref);
2063 else if (info->lattices)
2065 class ipcp_param_lattices *src_plats
2066 = ipa_get_parm_lattices (info, src_idx);
2068 if (item->jftype == IPA_JF_PASS_THROUGH)
2070 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2072 if (!lat->is_single_const ())
2073 return NULL_TREE;
2075 value = lat->values->value;
2077 else if (src_plats->aggs
2078 && !src_plats->aggs_bottom
2079 && !src_plats->aggs_contain_variable
2080 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2082 struct ipcp_agg_lattice *aglat;
2084 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2086 if (aglat->offset > item->value.load_agg.offset)
2087 break;
2089 if (aglat->offset == item->value.load_agg.offset)
2091 if (aglat->is_single_const ())
2092 value = aglat->values->value;
2093 break;
2099 if (!value)
2100 return NULL_TREE;
2102 if (item->jftype == IPA_JF_LOAD_AGG)
2104 tree load_type = item->value.load_agg.type;
2105 tree value_type = TREE_TYPE (value);
2107 /* Ensure value type is compatible with load type. */
2108 if (!useless_type_conversion_p (load_type, value_type))
2109 return NULL_TREE;
2112 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2113 value,
2114 item->value.pass_through.operand,
2115 item->type);
2118 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2119 caller is inlined to) NODE which described by INFO and push the results to
2120 RES as describing values passed in parameter DST_INDEX. */
2122 void
2123 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2124 ipa_agg_jump_function *agg_jfunc,
2125 unsigned dst_index,
2126 vec<ipa_argagg_value> *res)
2128 unsigned prev_unit_offset = 0;
2129 bool first = true;
2131 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2133 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2134 if (!value)
2135 continue;
2137 ipa_argagg_value iav;
2138 iav.value = value;
2139 iav.unit_offset = item.offset / BITS_PER_UNIT;
2140 iav.index = dst_index;
2141 iav.by_ref = agg_jfunc->by_ref;
2143 gcc_assert (first
2144 || iav.unit_offset > prev_unit_offset);
2145 prev_unit_offset = iav.unit_offset;
2146 first = false;
2148 res->safe_push (iav);
2152 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2153 bottom, not containing a variable component and without any known value at
2154 the same time. */
2156 DEBUG_FUNCTION void
2157 ipcp_verify_propagated_values (void)
2159 struct cgraph_node *node;
2161 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2163 ipa_node_params *info = ipa_node_params_sum->get (node);
2164 if (!opt_for_fn (node->decl, flag_ipa_cp)
2165 || !opt_for_fn (node->decl, optimize))
2166 continue;
2167 int i, count = ipa_get_param_count (info);
2169 for (i = 0; i < count; i++)
2171 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2173 if (!lat->bottom
2174 && !lat->contains_variable
2175 && lat->values_count == 0)
2177 if (dump_file)
2179 symtab->dump (dump_file);
2180 fprintf (dump_file, "\nIPA lattices after constant "
2181 "propagation, before gcc_unreachable:\n");
2182 print_all_lattices (dump_file, true, false);
2185 gcc_unreachable ();
2191 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2193 static bool
2194 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2195 ipa_polymorphic_call_context y)
2197 return x.equal_to (y);
2201 /* Add a new value source to the value represented by THIS, marking that a
2202 value comes from edge CS and (if the underlying jump function is a
2203 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2204 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2205 scalar value of the parameter itself or the offset within an aggregate. */
2207 template <typename valtype>
2208 void
2209 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2210 int src_idx, HOST_WIDE_INT offset)
2212 ipcp_value_source<valtype> *src;
2214 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2215 src->offset = offset;
2216 src->cs = cs;
2217 src->val = src_val;
2218 src->index = src_idx;
2220 src->next = sources;
2221 sources = src;
2224 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2225 SOURCE and clear all other fields. */
2227 static ipcp_value<tree> *
2228 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2230 ipcp_value<tree> *val;
2232 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2233 val->value = cst;
2234 val->self_recursion_generated_level = same_lat_gen_level;
2235 return val;
2238 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2239 value to SOURCE and clear all other fields. */
2241 static ipcp_value<ipa_polymorphic_call_context> *
2242 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2243 unsigned same_lat_gen_level)
2245 ipcp_value<ipa_polymorphic_call_context> *val;
2247 val = new (ipcp_poly_ctx_values_pool.allocate ())
2248 ipcp_value<ipa_polymorphic_call_context>();
2249 val->value = ctx;
2250 val->self_recursion_generated_level = same_lat_gen_level;
2251 return val;
2254 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2255 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2256 meaning. OFFSET -1 means the source is scalar and not a part of an
2257 aggregate. If non-NULL, VAL_P records address of existing or newly added
2258 ipcp_value.
2260 If the value is generated for a self-recursive call as a result of an
2261 arithmetic pass-through jump-function acting on a value in the same lattice,
2262 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2263 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2265 template <typename valtype>
2266 bool
2267 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2268 ipcp_value<valtype> *src_val,
2269 int src_idx, HOST_WIDE_INT offset,
2270 ipcp_value<valtype> **val_p,
2271 unsigned same_lat_gen_level)
2273 ipcp_value<valtype> *val, *last_val = NULL;
2275 if (val_p)
2276 *val_p = NULL;
2278 if (bottom)
2279 return false;
2281 for (val = values; val; last_val = val, val = val->next)
2282 if (values_equal_for_ipcp_p (val->value, newval))
2284 if (val_p)
2285 *val_p = val;
2287 if (val->self_recursion_generated_level < same_lat_gen_level)
2288 val->self_recursion_generated_level = same_lat_gen_level;
2290 if (ipa_edge_within_scc (cs))
2292 ipcp_value_source<valtype> *s;
2293 for (s = val->sources; s; s = s->next)
2294 if (s->cs == cs && s->val == src_val)
2295 break;
2296 if (s)
2297 return false;
2300 val->add_source (cs, src_val, src_idx, offset);
2301 return false;
2304 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2305 param_ipa_cp_value_list_size))
2307 /* We can only free sources, not the values themselves, because sources
2308 of other values in this SCC might point to them. */
2309 for (val = values; val; val = val->next)
2311 while (val->sources)
2313 ipcp_value_source<valtype> *src = val->sources;
2314 val->sources = src->next;
2315 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2318 values = NULL;
2319 return set_to_bottom ();
2322 values_count++;
2323 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2324 val->add_source (cs, src_val, src_idx, offset);
2325 val->next = NULL;
2327 /* Add the new value to end of value list, which can reduce iterations
2328 of propagation stage for recursive function. */
2329 if (last_val)
2330 last_val->next = val;
2331 else
2332 values = val;
2334 if (val_p)
2335 *val_p = val;
2337 return true;
2340 /* A helper function that returns result of operation specified by OPCODE on
2341 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2342 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2343 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2344 the result. */
2346 static tree
2347 get_val_across_arith_op (enum tree_code opcode,
2348 tree opnd1_type,
2349 tree opnd2,
2350 ipcp_value<tree> *src_val,
2351 tree res_type)
2353 tree opnd1 = src_val->value;
2355 /* Skip source values that is incompatible with specified type. */
2356 if (opnd1_type
2357 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2358 return NULL_TREE;
2360 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2363 /* Propagate values through an arithmetic transformation described by a jump
2364 function associated with edge CS, taking values from SRC_LAT and putting
2365 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2366 OPND2 is a constant value if transformation is a binary operation.
2367 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2368 a part of the aggregate. SRC_IDX is the index of the source parameter.
2369 RES_TYPE is the value type of result being propagated into. Return true if
2370 DEST_LAT changed. */
2372 static bool
2373 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2374 enum tree_code opcode,
2375 tree opnd1_type,
2376 tree opnd2,
2377 ipcp_lattice<tree> *src_lat,
2378 ipcp_lattice<tree> *dest_lat,
2379 HOST_WIDE_INT src_offset,
2380 int src_idx,
2381 tree res_type)
2383 ipcp_value<tree> *src_val;
2384 bool ret = false;
2386 /* Due to circular dependencies, propagating within an SCC through arithmetic
2387 transformation would create infinite number of values. But for
2388 self-feeding recursive function, we could allow propagation in a limited
2389 count, and this can enable a simple kind of recursive function versioning.
2390 For other scenario, we would just make lattices bottom. */
2391 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2393 int i;
2395 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2396 param_ipa_cp_max_recursive_depth);
2397 if (src_lat != dest_lat || max_recursive_depth < 1)
2398 return dest_lat->set_contains_variable ();
2400 /* No benefit if recursive execution is in low probability. */
2401 if (cs->sreal_frequency () * 100
2402 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2403 param_ipa_cp_min_recursive_probability))
2404 return dest_lat->set_contains_variable ();
2406 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2408 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2410 /* Now we do not use self-recursively generated value as propagation
2411 source, this is absolutely conservative, but could avoid explosion
2412 of lattice's value space, especially when one recursive function
2413 calls another recursive. */
2414 if (src_val->self_recursion_generated_p ())
2416 ipcp_value_source<tree> *s;
2418 /* If the lattice has already been propagated for the call site,
2419 no need to do that again. */
2420 for (s = src_val->sources; s; s = s->next)
2421 if (s->cs == cs)
2422 return dest_lat->set_contains_variable ();
2424 else
2425 val_seeds.safe_push (src_val);
2428 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2430 /* Recursively generate lattice values with a limited count. */
2431 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2433 for (int j = 1; j < max_recursive_depth; j++)
2435 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2436 src_val, res_type);
2437 if (!cstval
2438 || !ipacp_value_safe_for_type (res_type, cstval))
2439 break;
2441 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2442 src_offset, &src_val, j);
2443 gcc_checking_assert (src_val);
2446 ret |= dest_lat->set_contains_variable ();
2448 else
2449 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2451 /* Now we do not use self-recursively generated value as propagation
2452 source, otherwise it is easy to make value space of normal lattice
2453 overflow. */
2454 if (src_val->self_recursion_generated_p ())
2456 ret |= dest_lat->set_contains_variable ();
2457 continue;
2460 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2461 src_val, res_type);
2462 if (cstval
2463 && ipacp_value_safe_for_type (res_type, cstval))
2464 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2465 src_offset);
2466 else
2467 ret |= dest_lat->set_contains_variable ();
2470 return ret;
2473 /* Propagate values through a pass-through jump function JFUNC associated with
2474 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2475 is the index of the source parameter. PARM_TYPE is the type of the
2476 parameter to which the result is passed. */
2478 static bool
2479 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2480 ipcp_lattice<tree> *src_lat,
2481 ipcp_lattice<tree> *dest_lat, int src_idx,
2482 tree parm_type)
2484 return propagate_vals_across_arith_jfunc (cs,
2485 ipa_get_jf_pass_through_operation (jfunc),
2486 NULL_TREE,
2487 ipa_get_jf_pass_through_operand (jfunc),
2488 src_lat, dest_lat, -1, src_idx, parm_type);
2491 /* Propagate values through an ancestor jump function JFUNC associated with
2492 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2493 is the index of the source parameter. */
2495 static bool
2496 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2497 struct ipa_jump_func *jfunc,
2498 ipcp_lattice<tree> *src_lat,
2499 ipcp_lattice<tree> *dest_lat, int src_idx,
2500 tree param_type)
2502 ipcp_value<tree> *src_val;
2503 bool ret = false;
2505 if (ipa_edge_within_scc (cs))
2506 return dest_lat->set_contains_variable ();
2508 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2510 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2512 if (t && ipacp_value_safe_for_type (param_type, t))
2513 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2514 else
2515 ret |= dest_lat->set_contains_variable ();
2518 return ret;
2521 /* Propagate scalar values across jump function JFUNC that is associated with
2522 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2523 parameter to which the result is passed. */
2525 static bool
2526 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2527 struct ipa_jump_func *jfunc,
2528 ipcp_lattice<tree> *dest_lat,
2529 tree param_type)
2531 if (dest_lat->bottom)
2532 return false;
2534 if (jfunc->type == IPA_JF_CONST)
2536 tree val = ipa_get_jf_constant (jfunc);
2537 if (ipacp_value_safe_for_type (param_type, val))
2538 return dest_lat->add_value (val, cs, NULL, 0);
2539 else
2540 return dest_lat->set_contains_variable ();
2542 else if (jfunc->type == IPA_JF_PASS_THROUGH
2543 || jfunc->type == IPA_JF_ANCESTOR)
2545 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2546 ipcp_lattice<tree> *src_lat;
2547 int src_idx;
2548 bool ret;
2550 if (jfunc->type == IPA_JF_PASS_THROUGH)
2551 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2552 else
2553 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2555 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2556 if (src_lat->bottom)
2557 return dest_lat->set_contains_variable ();
2559 /* If we would need to clone the caller and cannot, do not propagate. */
2560 if (!ipcp_versionable_function_p (cs->caller)
2561 && (src_lat->contains_variable
2562 || (src_lat->values_count > 1)))
2563 return dest_lat->set_contains_variable ();
2565 if (jfunc->type == IPA_JF_PASS_THROUGH)
2566 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2567 dest_lat, src_idx,
2568 param_type);
2569 else
2570 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2571 src_idx, param_type);
2573 if (src_lat->contains_variable)
2574 ret |= dest_lat->set_contains_variable ();
2576 return ret;
2579 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2580 use it for indirect inlining), we should propagate them too. */
2581 return dest_lat->set_contains_variable ();
2584 /* Propagate scalar values across jump function JFUNC that is associated with
2585 edge CS and describes argument IDX and put the values into DEST_LAT. */
2587 static bool
2588 propagate_context_across_jump_function (cgraph_edge *cs,
2589 ipa_jump_func *jfunc, int idx,
2590 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2592 if (dest_lat->bottom)
2593 return false;
2594 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2595 bool ret = false;
2596 bool added_sth = false;
2597 bool type_preserved = true;
2599 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2600 = ipa_get_ith_polymorhic_call_context (args, idx);
2602 if (edge_ctx_ptr)
2603 edge_ctx = *edge_ctx_ptr;
2605 if (jfunc->type == IPA_JF_PASS_THROUGH
2606 || jfunc->type == IPA_JF_ANCESTOR)
2608 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2609 int src_idx;
2610 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2612 /* TODO: Once we figure out how to propagate speculations, it will
2613 probably be a good idea to switch to speculation if type_preserved is
2614 not set instead of punting. */
2615 if (jfunc->type == IPA_JF_PASS_THROUGH)
2617 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2618 goto prop_fail;
2619 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2620 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2622 else
2624 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2625 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2628 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2629 /* If we would need to clone the caller and cannot, do not propagate. */
2630 if (!ipcp_versionable_function_p (cs->caller)
2631 && (src_lat->contains_variable
2632 || (src_lat->values_count > 1)))
2633 goto prop_fail;
2635 ipcp_value<ipa_polymorphic_call_context> *src_val;
2636 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2638 ipa_polymorphic_call_context cur = src_val->value;
2640 if (!type_preserved)
2641 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2642 if (jfunc->type == IPA_JF_ANCESTOR)
2643 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2644 /* TODO: In cases we know how the context is going to be used,
2645 we can improve the result by passing proper OTR_TYPE. */
2646 cur.combine_with (edge_ctx);
2647 if (!cur.useless_p ())
2649 if (src_lat->contains_variable
2650 && !edge_ctx.equal_to (cur))
2651 ret |= dest_lat->set_contains_variable ();
2652 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2653 added_sth = true;
2658 prop_fail:
2659 if (!added_sth)
2661 if (!edge_ctx.useless_p ())
2662 ret |= dest_lat->add_value (edge_ctx, cs);
2663 else
2664 ret |= dest_lat->set_contains_variable ();
2667 return ret;
2670 /* Propagate bits across jfunc that is associated with
2671 edge cs and update dest_lattice accordingly. */
2673 bool
2674 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2675 ipa_jump_func *jfunc,
2676 ipcp_bits_lattice *dest_lattice)
2678 if (dest_lattice->bottom_p ())
2679 return false;
2681 enum availability availability;
2682 cgraph_node *callee = cs->callee->function_symbol (&availability);
2683 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2684 tree parm_type = ipa_get_type (callee_info, idx);
2686 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2687 transform for these cases. Similarly, we can have bad type mismatches
2688 with LTO, avoid doing anything with those too. */
2689 if (!parm_type
2690 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2692 if (dump_file && (dump_flags & TDF_DETAILS))
2693 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2694 "param %i of %s is NULL or unsuitable for bits propagation\n",
2695 idx, cs->callee->dump_name ());
2697 return dest_lattice->set_to_bottom ();
2700 unsigned precision = TYPE_PRECISION (parm_type);
2701 signop sgn = TYPE_SIGN (parm_type);
2703 if (jfunc->type == IPA_JF_PASS_THROUGH
2704 || jfunc->type == IPA_JF_ANCESTOR)
2706 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2707 tree operand = NULL_TREE;
2708 enum tree_code code;
2709 unsigned src_idx;
2710 bool keep_null = false;
2712 if (jfunc->type == IPA_JF_PASS_THROUGH)
2714 code = ipa_get_jf_pass_through_operation (jfunc);
2715 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2716 if (code != NOP_EXPR)
2717 operand = ipa_get_jf_pass_through_operand (jfunc);
2719 else
2721 code = POINTER_PLUS_EXPR;
2722 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2723 unsigned HOST_WIDE_INT offset
2724 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2725 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2726 operand = build_int_cstu (size_type_node, offset);
2729 class ipcp_param_lattices *src_lats
2730 = ipa_get_parm_lattices (caller_info, src_idx);
2732 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2733 for eg consider:
2734 int f(int x)
2736 g (x & 0xff);
2738 Assume lattice for x is bottom, however we can still propagate
2739 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2740 and we store it in jump function during analysis stage. */
2742 if (!src_lats->bits_lattice.bottom_p ())
2744 bool drop_all_ones
2745 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2747 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2748 sgn, code, operand, drop_all_ones);
2752 Value_Range vr (parm_type);
2753 if (jfunc->m_vr)
2755 jfunc->m_vr->get_vrange (vr);
2756 if (!vr.undefined_p () && !vr.varying_p ())
2758 irange &r = as_a <irange> (vr);
2759 irange_bitmask bm = r.get_bitmask ();
2760 widest_int mask
2761 = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2762 widest_int value
2763 = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2764 return dest_lattice->meet_with (value, mask, precision);
2767 return dest_lattice->set_to_bottom ();
2770 /* Propagate value range across jump function JFUNC that is associated with
2771 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2772 accordingly. */
2774 static bool
2775 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2776 class ipcp_param_lattices *dest_plats,
2777 tree param_type)
2779 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2781 if (dest_lat->bottom_p ())
2782 return false;
2784 if (!param_type
2785 || (!INTEGRAL_TYPE_P (param_type)
2786 && !POINTER_TYPE_P (param_type)))
2787 return dest_lat->set_to_bottom ();
2789 if (jfunc->type == IPA_JF_PASS_THROUGH)
2791 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2792 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2793 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2794 class ipcp_param_lattices *src_lats
2795 = ipa_get_parm_lattices (caller_info, src_idx);
2796 tree operand_type = ipa_get_type (caller_info, src_idx);
2798 if (src_lats->m_value_range.bottom_p ())
2799 return dest_lat->set_to_bottom ();
2801 Value_Range vr (operand_type);
2802 if (TREE_CODE_CLASS (operation) == tcc_unary)
2803 ipa_vr_operation_and_type_effects (vr,
2804 src_lats->m_value_range.m_vr,
2805 operation, param_type,
2806 operand_type);
2807 /* A crude way to prevent unbounded number of value range updates
2808 in SCC components. We should allow limited number of updates within
2809 SCC, too. */
2810 else if (!ipa_edge_within_scc (cs))
2812 tree op = ipa_get_jf_pass_through_operand (jfunc);
2813 Value_Range op_vr (TREE_TYPE (op));
2814 Value_Range op_res (operand_type);
2815 range_op_handler handler (operation);
2817 ipa_range_set_and_normalize (op_vr, op);
2819 if (!handler
2820 || !op_res.supports_type_p (operand_type)
2821 || !handler.fold_range (op_res, operand_type,
2822 src_lats->m_value_range.m_vr, op_vr))
2823 op_res.set_varying (operand_type);
2825 ipa_vr_operation_and_type_effects (vr,
2826 op_res,
2827 NOP_EXPR, param_type,
2828 operand_type);
2830 if (!vr.undefined_p () && !vr.varying_p ())
2832 if (jfunc->m_vr)
2834 Value_Range jvr (param_type);
2835 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2836 NOP_EXPR,
2837 param_type,
2838 jfunc->m_vr->type ()))
2839 vr.intersect (jvr);
2841 return dest_lat->meet_with (vr);
2844 else if (jfunc->type == IPA_JF_CONST)
2846 tree val = ipa_get_jf_constant (jfunc);
2847 if (TREE_CODE (val) == INTEGER_CST)
2849 val = fold_convert (param_type, val);
2850 if (TREE_OVERFLOW_P (val))
2851 val = drop_tree_overflow (val);
2853 Value_Range tmpvr (val, val);
2854 return dest_lat->meet_with (tmpvr);
2858 Value_Range vr (param_type);
2859 if (jfunc->m_vr
2860 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2861 param_type,
2862 jfunc->m_vr->type ()))
2863 return dest_lat->meet_with (vr);
2864 else
2865 return dest_lat->set_to_bottom ();
2868 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2869 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2870 other cases, return false). If there are no aggregate items, set
2871 aggs_by_ref to NEW_AGGS_BY_REF. */
2873 static bool
2874 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2875 bool new_aggs_by_ref)
2877 if (dest_plats->aggs)
2879 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2881 set_agg_lats_to_bottom (dest_plats);
2882 return true;
2885 else
2886 dest_plats->aggs_by_ref = new_aggs_by_ref;
2887 return false;
2890 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2891 already existing lattice for the given OFFSET and SIZE, marking all skipped
2892 lattices as containing variable and checking for overlaps. If there is no
2893 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2894 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2895 unless there are too many already. If there are two many, return false. If
2896 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2897 skipped lattices were newly marked as containing variable, set *CHANGE to
2898 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2900 static bool
2901 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2902 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2903 struct ipcp_agg_lattice ***aglat,
2904 bool pre_existing, bool *change, int max_agg_items)
2906 gcc_checking_assert (offset >= 0);
2908 while (**aglat && (**aglat)->offset < offset)
2910 if ((**aglat)->offset + (**aglat)->size > offset)
2912 set_agg_lats_to_bottom (dest_plats);
2913 return false;
2915 *change |= (**aglat)->set_contains_variable ();
2916 *aglat = &(**aglat)->next;
2919 if (**aglat && (**aglat)->offset == offset)
2921 if ((**aglat)->size != val_size)
2923 set_agg_lats_to_bottom (dest_plats);
2924 return false;
2926 gcc_assert (!(**aglat)->next
2927 || (**aglat)->next->offset >= offset + val_size);
2928 return true;
2930 else
2932 struct ipcp_agg_lattice *new_al;
2934 if (**aglat && (**aglat)->offset < offset + val_size)
2936 set_agg_lats_to_bottom (dest_plats);
2937 return false;
2939 if (dest_plats->aggs_count == max_agg_items)
2940 return false;
2941 dest_plats->aggs_count++;
2942 new_al = ipcp_agg_lattice_pool.allocate ();
2943 memset (new_al, 0, sizeof (*new_al));
2945 new_al->offset = offset;
2946 new_al->size = val_size;
2947 new_al->contains_variable = pre_existing;
2949 new_al->next = **aglat;
2950 **aglat = new_al;
2951 return true;
2955 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2956 containing an unknown value. */
2958 static bool
2959 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2961 bool ret = false;
2962 while (aglat)
2964 ret |= aglat->set_contains_variable ();
2965 aglat = aglat->next;
2967 return ret;
2970 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2971 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2972 parameter used for lattice value sources. Return true if DEST_PLATS changed
2973 in any way. */
2975 static bool
2976 merge_aggregate_lattices (struct cgraph_edge *cs,
2977 class ipcp_param_lattices *dest_plats,
2978 class ipcp_param_lattices *src_plats,
2979 int src_idx, HOST_WIDE_INT offset_delta)
2981 bool pre_existing = dest_plats->aggs != NULL;
2982 struct ipcp_agg_lattice **dst_aglat;
2983 bool ret = false;
2985 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2986 return true;
2987 if (src_plats->aggs_bottom)
2988 return set_agg_lats_contain_variable (dest_plats);
2989 if (src_plats->aggs_contain_variable)
2990 ret |= set_agg_lats_contain_variable (dest_plats);
2991 dst_aglat = &dest_plats->aggs;
2993 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2994 param_ipa_max_agg_items);
2995 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2996 src_aglat;
2997 src_aglat = src_aglat->next)
2999 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
3001 if (new_offset < 0)
3002 continue;
3003 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
3004 &dst_aglat, pre_existing, &ret, max_agg_items))
3006 struct ipcp_agg_lattice *new_al = *dst_aglat;
3008 dst_aglat = &(*dst_aglat)->next;
3009 if (src_aglat->bottom)
3011 ret |= new_al->set_contains_variable ();
3012 continue;
3014 if (src_aglat->contains_variable)
3015 ret |= new_al->set_contains_variable ();
3016 for (ipcp_value<tree> *val = src_aglat->values;
3017 val;
3018 val = val->next)
3019 ret |= new_al->add_value (val->value, cs, val, src_idx,
3020 src_aglat->offset);
3022 else if (dest_plats->aggs_bottom)
3023 return true;
3025 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
3026 return ret;
3029 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
3030 pass-through JFUNC and if so, whether it has conform and conforms to the
3031 rules about propagating values passed by reference. */
3033 static bool
3034 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
3035 struct ipa_jump_func *jfunc)
3037 return src_plats->aggs
3038 && (!src_plats->aggs_by_ref
3039 || ipa_get_jf_pass_through_agg_preserved (jfunc));
3042 /* Propagate values through ITEM, jump function for a part of an aggregate,
3043 into corresponding aggregate lattice AGLAT. CS is the call graph edge
3044 associated with the jump function. Return true if AGLAT changed in any
3045 way. */
3047 static bool
3048 propagate_aggregate_lattice (struct cgraph_edge *cs,
3049 struct ipa_agg_jf_item *item,
3050 struct ipcp_agg_lattice *aglat)
3052 class ipa_node_params *caller_info;
3053 class ipcp_param_lattices *src_plats;
3054 struct ipcp_lattice<tree> *src_lat;
3055 HOST_WIDE_INT src_offset;
3056 int src_idx;
3057 tree load_type;
3058 bool ret;
3060 if (item->jftype == IPA_JF_CONST)
3062 tree value = item->value.constant;
3064 gcc_checking_assert (is_gimple_ip_invariant (value));
3065 return aglat->add_value (value, cs, NULL, 0);
3068 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
3069 || item->jftype == IPA_JF_LOAD_AGG);
3071 caller_info = ipa_node_params_sum->get (cs->caller);
3072 src_idx = item->value.pass_through.formal_id;
3073 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3075 if (item->jftype == IPA_JF_PASS_THROUGH)
3077 load_type = NULL_TREE;
3078 src_lat = &src_plats->itself;
3079 src_offset = -1;
3081 else
3083 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3084 struct ipcp_agg_lattice *src_aglat;
3086 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3087 if (src_aglat->offset >= load_offset)
3088 break;
3090 load_type = item->value.load_agg.type;
3091 if (!src_aglat
3092 || src_aglat->offset > load_offset
3093 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3094 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3095 return aglat->set_contains_variable ();
3097 src_lat = src_aglat;
3098 src_offset = load_offset;
3101 if (src_lat->bottom
3102 || (!ipcp_versionable_function_p (cs->caller)
3103 && !src_lat->is_single_const ()))
3104 return aglat->set_contains_variable ();
3106 ret = propagate_vals_across_arith_jfunc (cs,
3107 item->value.pass_through.operation,
3108 load_type,
3109 item->value.pass_through.operand,
3110 src_lat, aglat,
3111 src_offset,
3112 src_idx,
3113 item->type);
3115 if (src_lat->contains_variable)
3116 ret |= aglat->set_contains_variable ();
3118 return ret;
3121 /* Propagate scalar values across jump function JFUNC that is associated with
3122 edge CS and put the values into DEST_LAT. */
3124 static bool
3125 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3126 struct ipa_jump_func *jfunc,
3127 class ipcp_param_lattices *dest_plats)
3129 bool ret = false;
3131 if (dest_plats->aggs_bottom)
3132 return false;
3134 if (jfunc->type == IPA_JF_PASS_THROUGH
3135 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3137 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3138 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3139 class ipcp_param_lattices *src_plats;
3141 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3142 if (agg_pass_through_permissible_p (src_plats, jfunc))
3144 /* Currently we do not produce clobber aggregate jump
3145 functions, replace with merging when we do. */
3146 gcc_assert (!jfunc->agg.items);
3147 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3148 src_idx, 0);
3149 return ret;
3152 else if (jfunc->type == IPA_JF_ANCESTOR
3153 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3155 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3156 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3157 class ipcp_param_lattices *src_plats;
3159 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3160 if (src_plats->aggs && src_plats->aggs_by_ref)
3162 /* Currently we do not produce clobber aggregate jump
3163 functions, replace with merging when we do. */
3164 gcc_assert (!jfunc->agg.items);
3165 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3166 ipa_get_jf_ancestor_offset (jfunc));
3168 else if (!src_plats->aggs_by_ref)
3169 ret |= set_agg_lats_to_bottom (dest_plats);
3170 else
3171 ret |= set_agg_lats_contain_variable (dest_plats);
3172 return ret;
3175 if (jfunc->agg.items)
3177 bool pre_existing = dest_plats->aggs != NULL;
3178 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3179 struct ipa_agg_jf_item *item;
3180 int i;
3182 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3183 return true;
3185 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3186 param_ipa_max_agg_items);
3187 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3189 HOST_WIDE_INT val_size;
3191 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3192 continue;
3193 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3195 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3196 &aglat, pre_existing, &ret, max_agg_items))
3198 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3199 aglat = &(*aglat)->next;
3201 else if (dest_plats->aggs_bottom)
3202 return true;
3205 ret |= set_chain_of_aglats_contains_variable (*aglat);
3207 else
3208 ret |= set_agg_lats_contain_variable (dest_plats);
3210 return ret;
3213 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3214 non-thunk) destination, the call passes through a thunk. */
3216 static bool
3217 call_passes_through_thunk (cgraph_edge *cs)
3219 cgraph_node *alias_or_thunk = cs->callee;
3220 while (alias_or_thunk->alias)
3221 alias_or_thunk = alias_or_thunk->get_alias_target ();
3222 return alias_or_thunk->thunk;
3225 /* Propagate constants from the caller to the callee of CS. INFO describes the
3226 caller. */
3228 static bool
3229 propagate_constants_across_call (struct cgraph_edge *cs)
3231 class ipa_node_params *callee_info;
3232 enum availability availability;
3233 cgraph_node *callee;
3234 class ipa_edge_args *args;
3235 bool ret = false;
3236 int i, args_count, parms_count;
3238 callee = cs->callee->function_symbol (&availability);
3239 if (!callee->definition)
3240 return false;
3241 gcc_checking_assert (callee->has_gimple_body_p ());
3242 callee_info = ipa_node_params_sum->get (callee);
3243 if (!callee_info)
3244 return false;
3246 args = ipa_edge_args_sum->get (cs);
3247 parms_count = ipa_get_param_count (callee_info);
3248 if (parms_count == 0)
3249 return false;
3250 if (!args
3251 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3252 || !opt_for_fn (cs->caller->decl, optimize))
3254 for (i = 0; i < parms_count; i++)
3255 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3256 i));
3257 return ret;
3259 args_count = ipa_get_cs_argument_count (args);
3261 /* If this call goes through a thunk we must not propagate to the first (0th)
3262 parameter. However, we might need to uncover a thunk from below a series
3263 of aliases first. */
3264 if (call_passes_through_thunk (cs))
3266 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3267 0));
3268 i = 1;
3270 else
3271 i = 0;
3273 for (; (i < args_count) && (i < parms_count); i++)
3275 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3276 class ipcp_param_lattices *dest_plats;
3277 tree param_type = ipa_get_type (callee_info, i);
3279 dest_plats = ipa_get_parm_lattices (callee_info, i);
3280 if (availability == AVAIL_INTERPOSABLE)
3281 ret |= set_all_contains_variable (dest_plats);
3282 else
3284 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3285 &dest_plats->itself,
3286 param_type);
3287 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3288 &dest_plats->ctxlat);
3290 |= propagate_bits_across_jump_function (cs, i, jump_func,
3291 &dest_plats->bits_lattice);
3292 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3293 dest_plats);
3294 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3295 ret |= propagate_vr_across_jump_function (cs, jump_func,
3296 dest_plats, param_type);
3297 else
3298 ret |= dest_plats->m_value_range.set_to_bottom ();
3301 for (; i < parms_count; i++)
3302 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3304 return ret;
3307 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3308 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3309 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3310 KNOWN_AGGS is ignored. */
3312 static tree
3313 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3314 const vec<tree> &known_csts,
3315 const vec<ipa_polymorphic_call_context> &known_contexts,
3316 const ipa_argagg_value_list &avs,
3317 bool *speculative)
3319 int param_index = ie->indirect_info->param_index;
3320 HOST_WIDE_INT anc_offset;
3321 tree t = NULL;
3322 tree target = NULL;
3324 *speculative = false;
3326 if (param_index == -1)
3327 return NULL_TREE;
3329 if (!ie->indirect_info->polymorphic)
3331 tree t = NULL;
3333 if (ie->indirect_info->agg_contents)
3335 t = NULL;
3336 if ((unsigned) param_index < known_csts.length ()
3337 && known_csts[param_index])
3338 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3339 ie->indirect_info->offset,
3340 ie->indirect_info->by_ref);
3342 if (!t && ie->indirect_info->guaranteed_unmodified)
3343 t = avs.get_value (param_index,
3344 ie->indirect_info->offset / BITS_PER_UNIT,
3345 ie->indirect_info->by_ref);
3347 else if ((unsigned) param_index < known_csts.length ())
3348 t = known_csts[param_index];
3350 if (t
3351 && TREE_CODE (t) == ADDR_EXPR
3352 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3353 return TREE_OPERAND (t, 0);
3354 else
3355 return NULL_TREE;
3358 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3359 return NULL_TREE;
3361 gcc_assert (!ie->indirect_info->agg_contents);
3362 gcc_assert (!ie->indirect_info->by_ref);
3363 anc_offset = ie->indirect_info->offset;
3365 t = NULL;
3367 if ((unsigned) param_index < known_csts.length ()
3368 && known_csts[param_index])
3369 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3370 ie->indirect_info->offset, true);
3372 /* Try to work out value of virtual table pointer value in replacements. */
3373 /* or known aggregate values. */
3374 if (!t)
3375 t = avs.get_value (param_index,
3376 ie->indirect_info->offset / BITS_PER_UNIT,
3377 true);
3379 /* If we found the virtual table pointer, lookup the target. */
3380 if (t)
3382 tree vtable;
3383 unsigned HOST_WIDE_INT offset;
3384 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3386 bool can_refer;
3387 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3388 vtable, offset, &can_refer);
3389 if (can_refer)
3391 if (!target
3392 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3393 || !possible_polymorphic_call_target_p
3394 (ie, cgraph_node::get (target)))
3396 /* Do not speculate builtin_unreachable, it is stupid! */
3397 if (ie->indirect_info->vptr_changed)
3398 return NULL;
3399 target = ipa_impossible_devirt_target (ie, target);
3401 *speculative = ie->indirect_info->vptr_changed;
3402 if (!*speculative)
3403 return target;
3408 /* Do we know the constant value of pointer? */
3409 if (!t && (unsigned) param_index < known_csts.length ())
3410 t = known_csts[param_index];
3412 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3414 ipa_polymorphic_call_context context;
3415 if (known_contexts.length () > (unsigned int) param_index)
3417 context = known_contexts[param_index];
3418 context.offset_by (anc_offset);
3419 if (ie->indirect_info->vptr_changed)
3420 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3421 ie->indirect_info->otr_type);
3422 if (t)
3424 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3425 (t, ie->indirect_info->otr_type, anc_offset);
3426 if (!ctx2.useless_p ())
3427 context.combine_with (ctx2, ie->indirect_info->otr_type);
3430 else if (t)
3432 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3433 anc_offset);
3434 if (ie->indirect_info->vptr_changed)
3435 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3436 ie->indirect_info->otr_type);
3438 else
3439 return NULL_TREE;
3441 vec <cgraph_node *>targets;
3442 bool final;
3444 targets = possible_polymorphic_call_targets
3445 (ie->indirect_info->otr_type,
3446 ie->indirect_info->otr_token,
3447 context, &final);
3448 if (!final || targets.length () > 1)
3450 struct cgraph_node *node;
3451 if (*speculative)
3452 return target;
3453 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3454 || ie->speculative || !ie->maybe_hot_p ())
3455 return NULL;
3456 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3457 ie->indirect_info->otr_token,
3458 context);
3459 if (node)
3461 *speculative = true;
3462 target = node->decl;
3464 else
3465 return NULL;
3467 else
3469 *speculative = false;
3470 if (targets.length () == 1)
3471 target = targets[0]->decl;
3472 else
3473 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3476 if (target && !possible_polymorphic_call_target_p (ie,
3477 cgraph_node::get (target)))
3479 if (*speculative)
3480 return NULL;
3481 target = ipa_impossible_devirt_target (ie, target);
3484 return target;
3487 /* If an indirect edge IE can be turned into a direct one based on data in
3488 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3489 whether the discovered target is only speculative guess. */
3491 tree
3492 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3493 ipa_call_arg_values *avals,
3494 bool *speculative)
3496 ipa_argagg_value_list avl (avals);
3497 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3498 avals->m_known_contexts,
3499 avl, speculative);
3502 /* Calculate devirtualization time bonus for NODE, assuming we know information
3503 about arguments stored in AVALS. */
3505 static int
3506 devirtualization_time_bonus (struct cgraph_node *node,
3507 ipa_auto_call_arg_values *avals)
3509 struct cgraph_edge *ie;
3510 int res = 0;
3512 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3514 struct cgraph_node *callee;
3515 class ipa_fn_summary *isummary;
3516 enum availability avail;
3517 tree target;
3518 bool speculative;
3520 ipa_argagg_value_list avl (avals);
3521 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3522 avals->m_known_contexts,
3523 avl, &speculative);
3524 if (!target)
3525 continue;
3527 /* Only bare minimum benefit for clearly un-inlineable targets. */
3528 res += 1;
3529 callee = cgraph_node::get (target);
3530 if (!callee || !callee->definition)
3531 continue;
3532 callee = callee->function_symbol (&avail);
3533 if (avail < AVAIL_AVAILABLE)
3534 continue;
3535 isummary = ipa_fn_summaries->get (callee);
3536 if (!isummary || !isummary->inlinable)
3537 continue;
3539 int size = ipa_size_summaries->get (callee)->size;
3540 /* FIXME: The values below need re-considering and perhaps also
3541 integrating into the cost metrics, at lest in some very basic way. */
3542 int max_inline_insns_auto
3543 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3544 if (size <= max_inline_insns_auto / 4)
3545 res += 31 / ((int)speculative + 1);
3546 else if (size <= max_inline_insns_auto / 2)
3547 res += 15 / ((int)speculative + 1);
3548 else if (size <= max_inline_insns_auto
3549 || DECL_DECLARED_INLINE_P (callee->decl))
3550 res += 7 / ((int)speculative + 1);
3553 return res;
3556 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3558 static int
3559 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3561 int result = 0;
3562 ipa_hints hints = estimates.hints;
3563 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3564 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3566 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3568 if (hints & INLINE_HINT_loop_iterations)
3569 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3571 if (hints & INLINE_HINT_loop_stride)
3572 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3574 return result;
3577 /* If there is a reason to penalize the function described by INFO in the
3578 cloning goodness evaluation, do so. */
3580 static inline sreal
3581 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3582 sreal evaluation)
3584 if (info->node_within_scc && !info->node_is_self_scc)
3585 evaluation = (evaluation
3586 * (100 - opt_for_fn (node->decl,
3587 param_ipa_cp_recursion_penalty))) / 100;
3589 if (info->node_calling_single_call)
3590 evaluation = (evaluation
3591 * (100 - opt_for_fn (node->decl,
3592 param_ipa_cp_single_call_penalty)))
3593 / 100;
3595 return evaluation;
3598 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3599 and SIZE_COST and with the sum of frequencies of incoming edges to the
3600 potential new clone in FREQUENCIES. */
3602 static bool
3603 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3604 sreal freq_sum, profile_count count_sum,
3605 int size_cost)
3607 if (time_benefit == 0
3608 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3609 || node->optimize_for_size_p ())
3610 return false;
3612 gcc_assert (size_cost > 0);
3614 ipa_node_params *info = ipa_node_params_sum->get (node);
3615 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3616 if (count_sum.nonzero_p ())
3618 gcc_assert (base_count.nonzero_p ());
3619 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3620 sreal evaluation = (time_benefit * factor) / size_cost;
3621 evaluation = incorporate_penalties (node, info, evaluation);
3622 evaluation *= 1000;
3624 if (dump_file && (dump_flags & TDF_DETAILS))
3626 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3627 "size: %i, count_sum: ", time_benefit.to_double (),
3628 size_cost);
3629 count_sum.dump (dump_file);
3630 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3631 info->node_within_scc
3632 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3633 info->node_calling_single_call ? ", single_call" : "",
3634 evaluation.to_double (), eval_threshold);
3637 return evaluation.to_int () >= eval_threshold;
3639 else
3641 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3642 evaluation = incorporate_penalties (node, info, evaluation);
3643 evaluation *= 1000;
3645 if (dump_file && (dump_flags & TDF_DETAILS))
3646 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3647 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3648 "threshold: %i\n",
3649 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3650 info->node_within_scc
3651 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3652 info->node_calling_single_call ? ", single_call" : "",
3653 evaluation.to_double (), eval_threshold);
3655 return evaluation.to_int () >= eval_threshold;
3659 /* Grow vectors in AVALS and fill them with information about values of
3660 parameters that are known to be independent of the context. Only calculate
3661 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3662 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3663 parameters will be stored in it.
3665 TODO: Also grow context independent value range vectors. */
3667 static bool
3668 gather_context_independent_values (class ipa_node_params *info,
3669 ipa_auto_call_arg_values *avals,
3670 bool calculate_aggs,
3671 int *removable_params_cost)
3673 int i, count = ipa_get_param_count (info);
3674 bool ret = false;
3676 avals->m_known_vals.safe_grow_cleared (count, true);
3677 avals->m_known_contexts.safe_grow_cleared (count, true);
3679 if (removable_params_cost)
3680 *removable_params_cost = 0;
3682 for (i = 0; i < count; i++)
3684 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3685 ipcp_lattice<tree> *lat = &plats->itself;
3687 if (lat->is_single_const ())
3689 ipcp_value<tree> *val = lat->values;
3690 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3691 avals->m_known_vals[i] = val->value;
3692 if (removable_params_cost)
3693 *removable_params_cost
3694 += estimate_move_cost (TREE_TYPE (val->value), false);
3695 ret = true;
3697 else if (removable_params_cost
3698 && !ipa_is_param_used (info, i))
3699 *removable_params_cost
3700 += ipa_get_param_move_cost (info, i);
3702 if (!ipa_is_param_used (info, i))
3703 continue;
3705 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3706 /* Do not account known context as reason for cloning. We can see
3707 if it permits devirtualization. */
3708 if (ctxlat->is_single_const ())
3709 avals->m_known_contexts[i] = ctxlat->values->value;
3711 if (calculate_aggs)
3712 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3715 return ret;
3718 /* Perform time and size measurement of NODE with the context given in AVALS,
3719 calculate the benefit compared to the node without specialization and store
3720 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3721 context-independent or unused removable parameters and EST_MOVE_COST, the
3722 estimated movement of the considered parameter. */
3724 static void
3725 perform_estimation_of_a_value (cgraph_node *node,
3726 ipa_auto_call_arg_values *avals,
3727 int removable_params_cost, int est_move_cost,
3728 ipcp_value_base *val)
3730 sreal time_benefit;
3731 ipa_call_estimates estimates;
3733 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3735 /* Extern inline functions have no cloning local time benefits because they
3736 will be inlined anyway. The only reason to clone them is if it enables
3737 optimization in any of the functions they call. */
3738 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3739 time_benefit = 0;
3740 else
3741 time_benefit = (estimates.nonspecialized_time - estimates.time)
3742 + (devirtualization_time_bonus (node, avals)
3743 + hint_time_bonus (node, estimates)
3744 + removable_params_cost + est_move_cost);
3746 int size = estimates.size;
3747 gcc_checking_assert (size >=0);
3748 /* The inliner-heuristics based estimates may think that in certain
3749 contexts some functions do not have any size at all but we want
3750 all specializations to have at least a tiny cost, not least not to
3751 divide by zero. */
3752 if (size == 0)
3753 size = 1;
3755 val->local_time_benefit = time_benefit;
3756 val->local_size_cost = size;
3759 /* Get the overall limit oof growth based on parameters extracted from growth.
3760 it does not really make sense to mix functions with different overall growth
3761 limits but it is possible and if it happens, we do not want to select one
3762 limit at random. */
3764 static long
3765 get_max_overall_size (cgraph_node *node)
3767 long max_new_size = orig_overall_size;
3768 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3769 if (max_new_size < large_unit)
3770 max_new_size = large_unit;
3771 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3772 max_new_size += max_new_size * unit_growth / 100 + 1;
3773 return max_new_size;
3776 /* Return true if NODE should be cloned just for a parameter removal, possibly
3777 dumping a reason if not. */
3779 static bool
3780 clone_for_param_removal_p (cgraph_node *node)
3782 if (!node->can_change_signature)
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3785 fprintf (dump_file, " Not considering cloning to remove parameters, "
3786 "function cannot change signature.\n");
3787 return false;
3789 if (node->can_be_local_p ())
3791 if (dump_file && (dump_flags & TDF_DETAILS))
3792 fprintf (dump_file, " Not considering cloning to remove parameters, "
3793 "IPA-SRA can do it potentially better.\n");
3794 return false;
3796 return true;
3799 /* Iterate over known values of parameters of NODE and estimate the local
3800 effects in terms of time and size they have. */
3802 static void
3803 estimate_local_effects (struct cgraph_node *node)
3805 ipa_node_params *info = ipa_node_params_sum->get (node);
3806 int count = ipa_get_param_count (info);
3807 bool always_const;
3808 int removable_params_cost;
3810 if (!count || !ipcp_versionable_function_p (node))
3811 return;
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3814 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3816 ipa_auto_call_arg_values avals;
3817 always_const = gather_context_independent_values (info, &avals, true,
3818 &removable_params_cost);
3819 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3820 if (always_const || devirt_bonus
3821 || (removable_params_cost && clone_for_param_removal_p (node)))
3823 struct caller_statistics stats;
3824 ipa_call_estimates estimates;
3826 init_caller_stats (&stats);
3827 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3828 false);
3829 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3830 sreal time = estimates.nonspecialized_time - estimates.time;
3831 time += devirt_bonus;
3832 time += hint_time_bonus (node, estimates);
3833 time += removable_params_cost;
3834 int size = estimates.size - stats.n_calls * removable_params_cost;
3836 if (dump_file)
3837 fprintf (dump_file, " - context independent values, size: %i, "
3838 "time_benefit: %f\n", size, (time).to_double ());
3840 if (size <= 0 || node->local)
3842 info->do_clone_for_all_contexts = true;
3844 if (dump_file)
3845 fprintf (dump_file, " Decided to specialize for all "
3846 "known contexts, code not going to grow.\n");
3848 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3849 stats.count_sum, size))
3851 if (size + overall_size <= get_max_overall_size (node))
3853 info->do_clone_for_all_contexts = true;
3854 overall_size += size;
3856 if (dump_file)
3857 fprintf (dump_file, " Decided to specialize for all "
3858 "known contexts, growth (to %li) deemed "
3859 "beneficial.\n", overall_size);
3861 else if (dump_file && (dump_flags & TDF_DETAILS))
3862 fprintf (dump_file, " Not cloning for all contexts because "
3863 "maximum unit size would be reached with %li.\n",
3864 size + overall_size);
3866 else if (dump_file && (dump_flags & TDF_DETAILS))
3867 fprintf (dump_file, " Not cloning for all contexts because "
3868 "!good_cloning_opportunity_p.\n");
3872 for (int i = 0; i < count; i++)
3874 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3875 ipcp_lattice<tree> *lat = &plats->itself;
3876 ipcp_value<tree> *val;
3878 if (lat->bottom
3879 || !lat->values
3880 || avals.m_known_vals[i])
3881 continue;
3883 for (val = lat->values; val; val = val->next)
3885 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3886 avals.m_known_vals[i] = val->value;
3888 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3889 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3890 emc, val);
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3894 fprintf (dump_file, " - estimates for value ");
3895 print_ipcp_constant_value (dump_file, val->value);
3896 fprintf (dump_file, " for ");
3897 ipa_dump_param (dump_file, info, i);
3898 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3899 val->local_time_benefit.to_double (),
3900 val->local_size_cost);
3903 avals.m_known_vals[i] = NULL_TREE;
3906 for (int i = 0; i < count; i++)
3908 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3910 if (!plats->virt_call)
3911 continue;
3913 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3914 ipcp_value<ipa_polymorphic_call_context> *val;
3916 if (ctxlat->bottom
3917 || !ctxlat->values
3918 || !avals.m_known_contexts[i].useless_p ())
3919 continue;
3921 for (val = ctxlat->values; val; val = val->next)
3923 avals.m_known_contexts[i] = val->value;
3924 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3925 0, val);
3927 if (dump_file && (dump_flags & TDF_DETAILS))
3929 fprintf (dump_file, " - estimates for polymorphic context ");
3930 print_ipcp_constant_value (dump_file, val->value);
3931 fprintf (dump_file, " for ");
3932 ipa_dump_param (dump_file, info, i);
3933 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3934 val->local_time_benefit.to_double (),
3935 val->local_size_cost);
3938 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3941 unsigned all_ctx_len = avals.m_known_aggs.length ();
3942 auto_vec<ipa_argagg_value, 32> all_ctx;
3943 all_ctx.reserve_exact (all_ctx_len);
3944 all_ctx.splice (avals.m_known_aggs);
3945 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3947 unsigned j = 0;
3948 for (int index = 0; index < count; index++)
3950 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3952 if (plats->aggs_bottom || !plats->aggs)
3953 continue;
3955 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3957 ipcp_value<tree> *val;
3958 if (aglat->bottom || !aglat->values
3959 /* If the following is true, the one value is already part of all
3960 context estimations. */
3961 || (!plats->aggs_contain_variable
3962 && aglat->is_single_const ()))
3963 continue;
3965 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3966 while (j < all_ctx_len
3967 && (all_ctx[j].index < index
3968 || (all_ctx[j].index == index
3969 && all_ctx[j].unit_offset < unit_offset)))
3971 avals.m_known_aggs[j] = all_ctx[j];
3972 j++;
3975 for (unsigned k = j; k < all_ctx_len; k++)
3976 avals.m_known_aggs[k+1] = all_ctx[k];
3978 for (val = aglat->values; val; val = val->next)
3980 avals.m_known_aggs[j].value = val->value;
3981 avals.m_known_aggs[j].unit_offset = unit_offset;
3982 avals.m_known_aggs[j].index = index;
3983 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3985 perform_estimation_of_a_value (node, &avals,
3986 removable_params_cost, 0, val);
3988 if (dump_file && (dump_flags & TDF_DETAILS))
3990 fprintf (dump_file, " - estimates for value ");
3991 print_ipcp_constant_value (dump_file, val->value);
3992 fprintf (dump_file, " for ");
3993 ipa_dump_param (dump_file, info, index);
3994 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3995 "]: time_benefit: %g, size: %i\n",
3996 plats->aggs_by_ref ? "ref " : "",
3997 aglat->offset,
3998 val->local_time_benefit.to_double (),
3999 val->local_size_cost);
4007 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
4008 topological sort of values. */
4010 template <typename valtype>
4011 void
4012 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
4014 ipcp_value_source<valtype> *src;
4016 if (cur_val->dfs)
4017 return;
4019 dfs_counter++;
4020 cur_val->dfs = dfs_counter;
4021 cur_val->low_link = dfs_counter;
4023 cur_val->topo_next = stack;
4024 stack = cur_val;
4025 cur_val->on_stack = true;
4027 for (src = cur_val->sources; src; src = src->next)
4028 if (src->val)
4030 if (src->val->dfs == 0)
4032 add_val (src->val);
4033 if (src->val->low_link < cur_val->low_link)
4034 cur_val->low_link = src->val->low_link;
4036 else if (src->val->on_stack
4037 && src->val->dfs < cur_val->low_link)
4038 cur_val->low_link = src->val->dfs;
4041 if (cur_val->dfs == cur_val->low_link)
4043 ipcp_value<valtype> *v, *scc_list = NULL;
4047 v = stack;
4048 stack = v->topo_next;
4049 v->on_stack = false;
4050 v->scc_no = cur_val->dfs;
4052 v->scc_next = scc_list;
4053 scc_list = v;
4055 while (v != cur_val);
4057 cur_val->topo_next = values_topo;
4058 values_topo = cur_val;
4062 /* Add all values in lattices associated with NODE to the topological sort if
4063 they are not there yet. */
4065 static void
4066 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
4068 ipa_node_params *info = ipa_node_params_sum->get (node);
4069 int i, count = ipa_get_param_count (info);
4071 for (i = 0; i < count; i++)
4073 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4074 ipcp_lattice<tree> *lat = &plats->itself;
4075 struct ipcp_agg_lattice *aglat;
4077 if (!lat->bottom)
4079 ipcp_value<tree> *val;
4080 for (val = lat->values; val; val = val->next)
4081 topo->constants.add_val (val);
4084 if (!plats->aggs_bottom)
4085 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4086 if (!aglat->bottom)
4088 ipcp_value<tree> *val;
4089 for (val = aglat->values; val; val = val->next)
4090 topo->constants.add_val (val);
4093 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4094 if (!ctxlat->bottom)
4096 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4097 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4098 topo->contexts.add_val (ctxval);
4103 /* One pass of constants propagation along the call graph edges, from callers
4104 to callees (requires topological ordering in TOPO), iterate over strongly
4105 connected components. */
4107 static void
4108 propagate_constants_topo (class ipa_topo_info *topo)
4110 int i;
4112 for (i = topo->nnodes - 1; i >= 0; i--)
4114 unsigned j;
4115 struct cgraph_node *v, *node = topo->order[i];
4116 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4118 /* First, iteratively propagate within the strongly connected component
4119 until all lattices stabilize. */
4120 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4121 if (v->has_gimple_body_p ())
4123 if (opt_for_fn (v->decl, flag_ipa_cp)
4124 && opt_for_fn (v->decl, optimize))
4125 push_node_to_stack (topo, v);
4126 /* When V is not optimized, we can not push it to stack, but
4127 still we need to set all its callees lattices to bottom. */
4128 else
4130 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4131 propagate_constants_across_call (cs);
4135 v = pop_node_from_stack (topo);
4136 while (v)
4138 struct cgraph_edge *cs;
4139 class ipa_node_params *info = NULL;
4140 bool self_scc = true;
4142 for (cs = v->callees; cs; cs = cs->next_callee)
4143 if (ipa_edge_within_scc (cs))
4145 cgraph_node *callee = cs->callee->function_symbol ();
4147 if (v != callee)
4148 self_scc = false;
4150 if (!info)
4152 info = ipa_node_params_sum->get (v);
4153 info->node_within_scc = true;
4156 if (propagate_constants_across_call (cs))
4157 push_node_to_stack (topo, callee);
4160 if (info)
4161 info->node_is_self_scc = self_scc;
4163 v = pop_node_from_stack (topo);
4166 /* Afterwards, propagate along edges leading out of the SCC, calculates
4167 the local effects of the discovered constants and all valid values to
4168 their topological sort. */
4169 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4170 if (v->has_gimple_body_p ()
4171 && opt_for_fn (v->decl, flag_ipa_cp)
4172 && opt_for_fn (v->decl, optimize))
4174 struct cgraph_edge *cs;
4176 estimate_local_effects (v);
4177 add_all_node_vals_to_toposort (v, topo);
4178 for (cs = v->callees; cs; cs = cs->next_callee)
4179 if (!ipa_edge_within_scc (cs))
4180 propagate_constants_across_call (cs);
4182 cycle_nodes.release ();
4186 /* Propagate the estimated effects of individual values along the topological
4187 from the dependent values to those they depend on. */
4189 template <typename valtype>
4190 void
4191 value_topo_info<valtype>::propagate_effects ()
4193 ipcp_value<valtype> *base;
4194 hash_set<ipcp_value<valtype> *> processed_srcvals;
4196 for (base = values_topo; base; base = base->topo_next)
4198 ipcp_value_source<valtype> *src;
4199 ipcp_value<valtype> *val;
4200 sreal time = 0;
4201 HOST_WIDE_INT size = 0;
4203 for (val = base; val; val = val->scc_next)
4205 time = time + val->local_time_benefit + val->prop_time_benefit;
4206 size = size + val->local_size_cost + val->prop_size_cost;
4209 for (val = base; val; val = val->scc_next)
4211 processed_srcvals.empty ();
4212 for (src = val->sources; src; src = src->next)
4213 if (src->val
4214 && src->cs->maybe_hot_p ())
4216 if (!processed_srcvals.add (src->val))
4218 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4219 if (prop_size < INT_MAX)
4220 src->val->prop_size_cost = prop_size;
4221 else
4222 continue;
4225 int special_factor = 1;
4226 if (val->same_scc (src->val))
4227 special_factor
4228 = opt_for_fn(src->cs->caller->decl,
4229 param_ipa_cp_recursive_freq_factor);
4230 else if (val->self_recursion_generated_p ()
4231 && (src->cs->callee->function_symbol ()
4232 == src->cs->caller))
4234 int max_recur_gen_depth
4235 = opt_for_fn(src->cs->caller->decl,
4236 param_ipa_cp_max_recursive_depth);
4237 special_factor = max_recur_gen_depth
4238 - val->self_recursion_generated_level + 1;
4241 src->val->prop_time_benefit
4242 += time * special_factor * src->cs->sreal_frequency ();
4245 if (size < INT_MAX)
4247 val->prop_time_benefit = time;
4248 val->prop_size_cost = size;
4250 else
4252 val->prop_time_benefit = 0;
4253 val->prop_size_cost = 0;
4259 /* Callback for qsort to sort counts of all edges. */
4261 static int
4262 compare_edge_profile_counts (const void *a, const void *b)
4264 const profile_count *cnt1 = (const profile_count *) a;
4265 const profile_count *cnt2 = (const profile_count *) b;
4267 if (*cnt1 < *cnt2)
4268 return 1;
4269 if (*cnt1 > *cnt2)
4270 return -1;
4271 return 0;
4275 /* Propagate constants, polymorphic contexts and their effects from the
4276 summaries interprocedurally. */
4278 static void
4279 ipcp_propagate_stage (class ipa_topo_info *topo)
4281 struct cgraph_node *node;
4283 if (dump_file)
4284 fprintf (dump_file, "\n Propagating constants:\n\n");
4286 base_count = profile_count::uninitialized ();
4288 bool compute_count_base = false;
4289 unsigned base_count_pos_percent = 0;
4290 FOR_EACH_DEFINED_FUNCTION (node)
4292 if (node->has_gimple_body_p ()
4293 && opt_for_fn (node->decl, flag_ipa_cp)
4294 && opt_for_fn (node->decl, optimize))
4296 ipa_node_params *info = ipa_node_params_sum->get (node);
4297 determine_versionability (node, info);
4299 unsigned nlattices = ipa_get_param_count (info);
4300 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4301 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4302 initialize_node_lattices (node);
4304 ipa_size_summary *s = ipa_size_summaries->get (node);
4305 if (node->definition && !node->alias && s != NULL)
4306 overall_size += s->self_size;
4307 if (node->count.ipa ().initialized_p ())
4309 compute_count_base = true;
4310 unsigned pos_percent = opt_for_fn (node->decl,
4311 param_ipa_cp_profile_count_base);
4312 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4316 if (compute_count_base)
4318 auto_vec<profile_count> all_edge_counts;
4319 all_edge_counts.reserve_exact (symtab->edges_count);
4320 FOR_EACH_DEFINED_FUNCTION (node)
4321 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4323 profile_count count = cs->count.ipa ();
4324 if (!count.nonzero_p ())
4325 continue;
4327 enum availability avail;
4328 cgraph_node *tgt
4329 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4330 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4331 if (info && info->versionable)
4332 all_edge_counts.quick_push (count);
4335 if (!all_edge_counts.is_empty ())
4337 gcc_assert (base_count_pos_percent <= 100);
4338 all_edge_counts.qsort (compare_edge_profile_counts);
4340 unsigned base_count_pos
4341 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4342 base_count = all_edge_counts[base_count_pos];
4344 if (dump_file)
4346 fprintf (dump_file, "\nSelected base_count from %u edges at "
4347 "position %u, arriving at: ", all_edge_counts.length (),
4348 base_count_pos);
4349 base_count.dump (dump_file);
4350 fprintf (dump_file, "\n");
4353 else if (dump_file)
4354 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4355 "continuing as if without profile feedback.\n");
4358 orig_overall_size = overall_size;
4360 if (dump_file)
4361 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4363 propagate_constants_topo (topo);
4364 if (flag_checking)
4365 ipcp_verify_propagated_values ();
4366 topo->constants.propagate_effects ();
4367 topo->contexts.propagate_effects ();
4369 if (dump_file)
4371 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4372 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4376 /* Discover newly direct outgoing edges from NODE which is a new clone with
4377 known KNOWN_CSTS and make them direct. */
4379 static void
4380 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4381 vec<tree> known_csts,
4382 vec<ipa_polymorphic_call_context>
4383 known_contexts,
4384 vec<ipa_argagg_value, va_gc> *aggvals)
4386 struct cgraph_edge *ie, *next_ie;
4387 bool found = false;
4389 for (ie = node->indirect_calls; ie; ie = next_ie)
4391 tree target;
4392 bool speculative;
4394 next_ie = ie->next_callee;
4395 ipa_argagg_value_list avs (aggvals);
4396 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4397 avs, &speculative);
4398 if (target)
4400 bool agg_contents = ie->indirect_info->agg_contents;
4401 bool polymorphic = ie->indirect_info->polymorphic;
4402 int param_index = ie->indirect_info->param_index;
4403 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4404 speculative);
4405 found = true;
4407 if (cs && !agg_contents && !polymorphic)
4409 ipa_node_params *info = ipa_node_params_sum->get (node);
4410 int c = ipa_get_controlled_uses (info, param_index);
4411 if (c != IPA_UNDESCRIBED_USE
4412 && !ipa_get_param_load_dereferenced (info, param_index))
4414 struct ipa_ref *to_del;
4416 c--;
4417 ipa_set_controlled_uses (info, param_index, c);
4418 if (dump_file && (dump_flags & TDF_DETAILS))
4419 fprintf (dump_file, " controlled uses count of param "
4420 "%i bumped down to %i\n", param_index, c);
4421 if (c == 0
4422 && (to_del = node->find_reference (cs->callee, NULL, 0,
4423 IPA_REF_ADDR)))
4425 if (dump_file && (dump_flags & TDF_DETAILS))
4426 fprintf (dump_file, " and even removing its "
4427 "cloning-created reference\n");
4428 to_del->remove_reference ();
4434 /* Turning calls to direct calls will improve overall summary. */
4435 if (found)
4436 ipa_update_overall_fn_summary (node);
4439 class edge_clone_summary;
4440 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4442 /* Edge clone summary. */
4444 class edge_clone_summary
4446 public:
4447 /* Default constructor. */
4448 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4450 /* Default destructor. */
4451 ~edge_clone_summary ()
4453 if (prev_clone)
4454 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4455 if (next_clone)
4456 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4459 cgraph_edge *prev_clone;
4460 cgraph_edge *next_clone;
4463 class edge_clone_summary_t:
4464 public call_summary <edge_clone_summary *>
4466 public:
4467 edge_clone_summary_t (symbol_table *symtab):
4468 call_summary <edge_clone_summary *> (symtab)
4470 m_initialize_when_cloning = true;
4473 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4474 edge_clone_summary *src_data,
4475 edge_clone_summary *dst_data) final override;
4478 /* Edge duplication hook. */
4480 void
4481 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4482 edge_clone_summary *src_data,
4483 edge_clone_summary *dst_data)
4485 if (src_data->next_clone)
4486 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4487 dst_data->prev_clone = src_edge;
4488 dst_data->next_clone = src_data->next_clone;
4489 src_data->next_clone = dst_edge;
4492 /* Return true is CS calls DEST or its clone for all contexts. When
4493 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4494 edges from/to an all-context clone. */
4496 static bool
4497 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4498 bool allow_recursion_to_clone)
4500 enum availability availability;
4501 cgraph_node *callee = cs->callee->function_symbol (&availability);
4503 if (availability <= AVAIL_INTERPOSABLE)
4504 return false;
4505 if (callee == dest)
4506 return true;
4507 if (!allow_recursion_to_clone && cs->caller == callee)
4508 return false;
4510 ipa_node_params *info = ipa_node_params_sum->get (callee);
4511 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4514 /* Return true if edge CS does bring about the value described by SRC to
4515 DEST_VAL of node DEST or its clone for all contexts. */
4517 static bool
4518 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4519 cgraph_node *dest, ipcp_value<tree> *dest_val)
4521 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4523 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4524 || caller_info->node_dead)
4525 return false;
4527 if (!src->val)
4528 return true;
4530 if (caller_info->ipcp_orig_node)
4532 tree t = NULL_TREE;
4533 if (src->offset == -1)
4534 t = caller_info->known_csts[src->index];
4535 else if (ipcp_transformation *ts
4536 = ipcp_get_transformation_summary (cs->caller))
4538 ipa_argagg_value_list avl (ts);
4539 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4541 return (t != NULL_TREE
4542 && values_equal_for_ipcp_p (src->val->value, t));
4544 else
4546 if (src->val == dest_val)
4547 return true;
4549 struct ipcp_agg_lattice *aglat;
4550 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4551 src->index);
4552 if (src->offset == -1)
4553 return (plats->itself.is_single_const ()
4554 && values_equal_for_ipcp_p (src->val->value,
4555 plats->itself.values->value));
4556 else
4558 if (plats->aggs_bottom || plats->aggs_contain_variable)
4559 return false;
4560 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4561 if (aglat->offset == src->offset)
4562 return (aglat->is_single_const ()
4563 && values_equal_for_ipcp_p (src->val->value,
4564 aglat->values->value));
4566 return false;
4570 /* Return true if edge CS does bring about the value described by SRC to
4571 DST_VAL of node DEST or its clone for all contexts. */
4573 static bool
4574 cgraph_edge_brings_value_p (cgraph_edge *cs,
4575 ipcp_value_source<ipa_polymorphic_call_context> *src,
4576 cgraph_node *dest,
4577 ipcp_value<ipa_polymorphic_call_context> *)
4579 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4581 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4582 || caller_info->node_dead)
4583 return false;
4584 if (!src->val)
4585 return true;
4587 if (caller_info->ipcp_orig_node)
4588 return (caller_info->known_contexts.length () > (unsigned) src->index)
4589 && values_equal_for_ipcp_p (src->val->value,
4590 caller_info->known_contexts[src->index]);
4592 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4593 src->index);
4594 return plats->ctxlat.is_single_const ()
4595 && values_equal_for_ipcp_p (src->val->value,
4596 plats->ctxlat.values->value);
4599 /* Get the next clone in the linked list of clones of an edge. */
4601 static inline struct cgraph_edge *
4602 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4604 edge_clone_summary *s = edge_clone_summaries->get (cs);
4605 return s != NULL ? s->next_clone : NULL;
4608 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4609 of them is viable and hot, return true. In that case, for those that still
4610 hold, add their edge frequency and their number and cumulative profile
4611 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4612 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4614 template <typename valtype>
4615 static bool
4616 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4617 sreal *freq_sum, int *caller_count,
4618 profile_count *rec_count_sum,
4619 profile_count *nonrec_count_sum)
4621 ipcp_value_source<valtype> *src;
4622 sreal freq = 0;
4623 int count = 0;
4624 profile_count rec_cnt = profile_count::zero ();
4625 profile_count nonrec_cnt = profile_count::zero ();
4626 bool hot = false;
4627 bool non_self_recursive = false;
4629 for (src = val->sources; src; src = src->next)
4631 struct cgraph_edge *cs = src->cs;
4632 while (cs)
4634 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4636 count++;
4637 freq += cs->sreal_frequency ();
4638 hot |= cs->maybe_hot_p ();
4639 if (cs->caller != dest)
4641 non_self_recursive = true;
4642 if (cs->count.ipa ().initialized_p ())
4643 rec_cnt += cs->count.ipa ();
4645 else if (cs->count.ipa ().initialized_p ())
4646 nonrec_cnt += cs->count.ipa ();
4648 cs = get_next_cgraph_edge_clone (cs);
4652 /* If the only edges bringing a value are self-recursive ones, do not bother
4653 evaluating it. */
4654 if (!non_self_recursive)
4655 return false;
4657 *freq_sum = freq;
4658 *caller_count = count;
4659 *rec_count_sum = rec_cnt;
4660 *nonrec_count_sum = nonrec_cnt;
4662 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4664 struct cgraph_edge *cs;
4666 /* Cold non-SCC source edge could trigger hot recursive execution of
4667 function. Consider the case as hot and rely on following cost model
4668 computation to further select right one. */
4669 for (cs = dest->callers; cs; cs = cs->next_caller)
4670 if (cs->caller == dest && cs->maybe_hot_p ())
4671 return true;
4674 return hot;
4677 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4678 to let a non-self-recursive caller be the first element. Thus, we can
4679 simplify intersecting operations on values that arrive from all of these
4680 callers, especially when there exists self-recursive call. Return true if
4681 this kind of adjustment is possible. */
4683 static bool
4684 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4685 cgraph_node *node)
4687 for (unsigned i = 0; i < callers.length (); i++)
4689 cgraph_edge *cs = callers[i];
4691 if (cs->caller != node)
4693 if (i > 0)
4695 callers[i] = callers[0];
4696 callers[0] = cs;
4698 return true;
4701 return false;
4704 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4705 is assumed their number is known and equal to CALLER_COUNT. */
4707 template <typename valtype>
4708 static vec<cgraph_edge *>
4709 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4710 int caller_count)
4712 ipcp_value_source<valtype> *src;
4713 vec<cgraph_edge *> ret;
4715 ret.create (caller_count);
4716 for (src = val->sources; src; src = src->next)
4718 struct cgraph_edge *cs = src->cs;
4719 while (cs)
4721 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4722 ret.quick_push (cs);
4723 cs = get_next_cgraph_edge_clone (cs);
4727 if (caller_count > 1)
4728 adjust_callers_for_value_intersection (ret, dest);
4730 return ret;
4733 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4734 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4735 should be set to true when the reference created for the constant should be
4736 a load one and not an address one because the corresponding parameter p is
4737 only used as *p. */
4739 static struct ipa_replace_map *
4740 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4741 bool force_load_ref)
4743 struct ipa_replace_map *replace_map;
4745 replace_map = ggc_alloc<ipa_replace_map> ();
4746 if (dump_file)
4748 fprintf (dump_file, " replacing ");
4749 ipa_dump_param (dump_file, info, parm_num);
4751 fprintf (dump_file, " with const ");
4752 print_generic_expr (dump_file, value);
4754 if (force_load_ref)
4755 fprintf (dump_file, " - forcing load reference\n");
4756 else
4757 fprintf (dump_file, "\n");
4759 replace_map->parm_num = parm_num;
4760 replace_map->new_tree = value;
4761 replace_map->force_load_ref = force_load_ref;
4762 return replace_map;
4765 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4766 one, otherwise it will be referred to as the original node. */
4768 static void
4769 dump_profile_updates (cgraph_node *node, bool spec)
4771 if (spec)
4772 fprintf (dump_file, " setting count of the specialized node %s to ",
4773 node->dump_name ());
4774 else
4775 fprintf (dump_file, " setting count of the original node %s to ",
4776 node->dump_name ());
4778 node->count.dump (dump_file);
4779 fprintf (dump_file, "\n");
4780 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4782 fprintf (dump_file, " edge to %s has count ",
4783 cs->callee->dump_name ());
4784 cs->count.dump (dump_file);
4785 fprintf (dump_file, "\n");
4789 /* With partial train run we do not want to assume that original's count is
4790 zero whenever we redurect all executed edges to clone. Simply drop profile
4791 to local one in this case. In eany case, return the new value. ORIG_NODE
4792 is the original node and its count has not been updaed yet. */
4794 profile_count
4795 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4797 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4798 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4799 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4800 remainder = remainder.guessed_local ();
4802 return remainder;
4805 /* Structure to sum counts coming from nodes other than the original node and
4806 its clones. */
4808 struct gather_other_count_struct
4810 cgraph_node *orig;
4811 profile_count other_count;
4814 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4815 counts that come from non-self-recursive calls.. */
4817 static bool
4818 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4820 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4821 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4822 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4823 desc->other_count += cs->count.ipa ();
4824 return false;
4827 /* Structure to help analyze if we need to boost counts of some clones of some
4828 non-recursive edges to match the new callee count. */
4830 struct desc_incoming_count_struct
4832 cgraph_node *orig;
4833 hash_set <cgraph_edge *> *processed_edges;
4834 profile_count count;
4835 unsigned unproc_orig_rec_edges;
4838 /* Go over edges calling NODE and its thunks and gather information about
4839 incoming counts so that we know if we need to make any adjustments. */
4841 static void
4842 analyze_clone_icoming_counts (cgraph_node *node,
4843 desc_incoming_count_struct *desc)
4845 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4846 if (cs->caller->thunk)
4848 analyze_clone_icoming_counts (cs->caller, desc);
4849 continue;
4851 else
4853 if (cs->count.initialized_p ())
4854 desc->count += cs->count.ipa ();
4855 if (!desc->processed_edges->contains (cs)
4856 && cs->caller->clone_of == desc->orig)
4857 desc->unproc_orig_rec_edges++;
4861 /* If caller edge counts of a clone created for a self-recursive arithmetic
4862 jump function must be adjusted because it is coming from a the "seed" clone
4863 for the first value and so has been excessively scaled back as if it was not
4864 a recursive call, adjust it so that the incoming counts of NODE match its
4865 count. NODE is the node or its thunk. */
4867 static void
4868 adjust_clone_incoming_counts (cgraph_node *node,
4869 desc_incoming_count_struct *desc)
4871 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4872 if (cs->caller->thunk)
4874 adjust_clone_incoming_counts (cs->caller, desc);
4875 profile_count sum = profile_count::zero ();
4876 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4877 if (e->count.initialized_p ())
4878 sum += e->count.ipa ();
4879 cs->count = cs->count.combine_with_ipa_count (sum);
4881 else if (!desc->processed_edges->contains (cs)
4882 && cs->caller->clone_of == desc->orig)
4884 cs->count += desc->count;
4885 if (dump_file)
4887 fprintf (dump_file, " Adjusted count of an incoming edge of "
4888 "a clone %s -> %s to ", cs->caller->dump_name (),
4889 cs->callee->dump_name ());
4890 cs->count.dump (dump_file);
4891 fprintf (dump_file, "\n");
4896 /* When ORIG_NODE has been cloned for values which have been generated fora
4897 self-recursive call as a result of an arithmetic pass-through
4898 jump-functions, adjust its count together with counts of all such clones in
4899 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4901 The function sums the counts of the original node and all its clones that
4902 cannot be attributed to a specific clone because it comes from a
4903 non-recursive edge. This sum is then evenly divided between the clones and
4904 on top of that each one gets all the counts which can be attributed directly
4905 to it. */
4907 static void
4908 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4909 const vec<cgraph_node *> &self_gen_clones)
4911 profile_count redist_sum = orig_node->count.ipa ();
4912 if (!(redist_sum > profile_count::zero ()))
4913 return;
4915 if (dump_file)
4916 fprintf (dump_file, " Updating profile of self recursive clone "
4917 "series\n");
4919 gather_other_count_struct gocs;
4920 gocs.orig = orig_node;
4921 gocs.other_count = profile_count::zero ();
4923 auto_vec <profile_count, 8> other_edges_count;
4924 for (cgraph_node *n : self_gen_clones)
4926 gocs.other_count = profile_count::zero ();
4927 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4928 &gocs, false);
4929 other_edges_count.safe_push (gocs.other_count);
4930 redist_sum -= gocs.other_count;
4933 hash_set<cgraph_edge *> processed_edges;
4934 unsigned i = 0;
4935 for (cgraph_node *n : self_gen_clones)
4937 profile_count orig_count = n->count;
4938 profile_count new_count
4939 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4940 new_count = lenient_count_portion_handling (new_count, orig_node);
4941 n->count = new_count;
4942 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4943 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4945 cs->count = cs->count.apply_scale (new_count, orig_count);
4946 processed_edges.add (cs);
4948 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4949 cs->count = cs->count.apply_scale (new_count, orig_count);
4951 i++;
4954 /* There are still going to be edges to ORIG_NODE that have one or more
4955 clones coming from another node clone in SELF_GEN_CLONES and which we
4956 scaled by the same amount, which means that the total incoming sum of
4957 counts to ORIG_NODE will be too high, scale such edges back. */
4958 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4960 if (cs->callee->ultimate_alias_target () == orig_node)
4962 unsigned den = 0;
4963 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4964 if (e->callee->ultimate_alias_target () == orig_node
4965 && processed_edges.contains (e))
4966 den++;
4967 if (den > 0)
4968 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4969 if (e->callee->ultimate_alias_target () == orig_node
4970 && processed_edges.contains (e))
4971 e->count /= den;
4975 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4976 along self-recursive edges are likely to have fairly low count and so
4977 edges from them to nodes in the self_gen_clones do not correspond to the
4978 artificially distributed count of the nodes, the total sum of incoming
4979 edges to some clones might be too low. Detect this situation and correct
4980 it. */
4981 for (cgraph_node *n : self_gen_clones)
4983 if (!(n->count.ipa () > profile_count::zero ()))
4984 continue;
4986 desc_incoming_count_struct desc;
4987 desc.orig = orig_node;
4988 desc.processed_edges = &processed_edges;
4989 desc.count = profile_count::zero ();
4990 desc.unproc_orig_rec_edges = 0;
4991 analyze_clone_icoming_counts (n, &desc);
4993 if (n->count.differs_from_p (desc.count))
4995 if (n->count > desc.count
4996 && desc.unproc_orig_rec_edges > 0)
4998 desc.count = n->count - desc.count;
4999 desc.count = desc.count /= desc.unproc_orig_rec_edges;
5000 adjust_clone_incoming_counts (n, &desc);
5002 else if (dump_file)
5003 fprintf (dump_file,
5004 " Unable to fix up incoming counts for %s.\n",
5005 n->dump_name ());
5009 if (dump_file)
5010 for (cgraph_node *n : self_gen_clones)
5011 dump_profile_updates (n, n != orig_node);
5012 return;
5015 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
5016 their profile information to reflect this. This function should not be used
5017 for clones generated for arithmetic pass-through jump functions on a
5018 self-recursive call graph edge, that situation is handled by
5019 update_counts_for_self_gen_clones. */
5021 static void
5022 update_profiling_info (struct cgraph_node *orig_node,
5023 struct cgraph_node *new_node)
5025 struct caller_statistics stats;
5026 profile_count new_sum;
5027 profile_count remainder, orig_node_count = orig_node->count.ipa ();
5029 if (!(orig_node_count > profile_count::zero ()))
5030 return;
5032 if (dump_file)
5034 fprintf (dump_file, " Updating profile from original count: ");
5035 orig_node_count.dump (dump_file);
5036 fprintf (dump_file, "\n");
5039 init_caller_stats (&stats, new_node);
5040 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
5041 false);
5042 new_sum = stats.count_sum;
5044 bool orig_edges_processed = false;
5045 if (new_sum > orig_node_count)
5047 /* TODO: Profile has alreay gone astray, keep what we have but lower it
5048 to global0 category. */
5049 remainder = orig_node->count.global0 ();
5051 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5052 cs->count = cs->count.global0 ();
5053 for (cgraph_edge *cs = orig_node->indirect_calls;
5055 cs = cs->next_callee)
5056 cs->count = cs->count.global0 ();
5057 orig_edges_processed = true;
5059 else if (stats.rec_count_sum.nonzero_p ())
5061 int new_nonrec_calls = stats.n_nonrec_calls;
5062 /* There are self-recursive edges which are likely to bring in the
5063 majority of calls but which we must divide in between the original and
5064 new node. */
5065 init_caller_stats (&stats, orig_node);
5066 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
5067 &stats, false);
5068 int orig_nonrec_calls = stats.n_nonrec_calls;
5069 profile_count orig_nonrec_call_count = stats.count_sum;
5071 if (orig_node->local)
5073 if (!orig_nonrec_call_count.nonzero_p ())
5075 if (dump_file)
5076 fprintf (dump_file, " The original is local and the only "
5077 "incoming edges from non-dead callers with nonzero "
5078 "counts are self-recursive, assuming it is cold.\n");
5079 /* The NEW_NODE count and counts of all its outgoing edges
5080 are still unmodified copies of ORIG_NODE's. Just clear
5081 the latter and bail out. */
5082 profile_count zero;
5083 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5084 zero = profile_count::zero ().guessed_local ();
5085 else
5086 zero = profile_count::adjusted_zero ();
5087 orig_node->count = zero;
5088 for (cgraph_edge *cs = orig_node->callees;
5090 cs = cs->next_callee)
5091 cs->count = zero;
5092 for (cgraph_edge *cs = orig_node->indirect_calls;
5094 cs = cs->next_callee)
5095 cs->count = zero;
5096 return;
5099 else
5101 /* Let's behave as if there was another caller that accounts for all
5102 the calls that were either indirect or from other compilation
5103 units. */
5104 orig_nonrec_calls++;
5105 profile_count pretend_caller_count
5106 = (orig_node_count - new_sum - orig_nonrec_call_count
5107 - stats.rec_count_sum);
5108 orig_nonrec_call_count += pretend_caller_count;
5111 /* Divide all "unexplained" counts roughly proportionally to sums of
5112 counts of non-recursive calls.
5114 We put rather arbitrary limits on how many counts we claim because the
5115 number of non-self-recursive incoming count is only a rough guideline
5116 and there are cases (such as mcf) where using it blindly just takes
5117 too many. And if lattices are considered in the opposite order we
5118 could also take too few. */
5119 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5121 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5122 profile_count new_part
5123 = MAX(MIN (unexp.apply_scale (new_sum,
5124 new_sum + orig_nonrec_call_count),
5125 unexp.apply_scale (limit_den - 1, limit_den)),
5126 unexp.apply_scale (new_nonrec_calls, limit_den));
5127 if (dump_file)
5129 fprintf (dump_file, " Claiming ");
5130 new_part.dump (dump_file);
5131 fprintf (dump_file, " of unexplained ");
5132 unexp.dump (dump_file);
5133 fprintf (dump_file, " counts because of self-recursive "
5134 "calls\n");
5136 new_sum += new_part;
5137 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5138 orig_node);
5140 else
5141 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5142 orig_node);
5144 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5145 new_node->count = new_sum;
5146 orig_node->count = remainder;
5148 profile_count orig_new_node_count = orig_node_count;
5149 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5150 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5151 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5152 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5153 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5155 if (!orig_edges_processed)
5157 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5158 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5159 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5160 for (cgraph_edge *cs = orig_node->indirect_calls;
5162 cs = cs->next_callee)
5163 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5166 if (dump_file)
5168 dump_profile_updates (new_node, true);
5169 dump_profile_updates (orig_node, false);
5173 /* Update the respective profile of specialized NEW_NODE and the original
5174 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5175 have been redirected to the specialized version. */
5177 static void
5178 update_specialized_profile (struct cgraph_node *new_node,
5179 struct cgraph_node *orig_node,
5180 profile_count redirected_sum)
5182 struct cgraph_edge *cs;
5183 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
5185 if (dump_file)
5187 fprintf (dump_file, " the sum of counts of redirected edges is ");
5188 redirected_sum.dump (dump_file);
5189 fprintf (dump_file, "\n old ipa count of the original node is ");
5190 orig_node_count.dump (dump_file);
5191 fprintf (dump_file, "\n");
5193 if (!(orig_node_count > profile_count::zero ()))
5194 return;
5196 new_node_count = new_node->count;
5197 new_node->count += redirected_sum;
5198 orig_node->count
5199 = lenient_count_portion_handling (orig_node->count - redirected_sum,
5200 orig_node);
5202 for (cs = new_node->callees; cs; cs = cs->next_callee)
5203 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5205 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5207 profile_count dec = cs->count.apply_scale (redirected_sum,
5208 orig_node_count);
5209 cs->count -= dec;
5212 if (dump_file)
5214 dump_profile_updates (new_node, true);
5215 dump_profile_updates (orig_node, false);
5219 static void adjust_references_in_caller (cgraph_edge *cs,
5220 symtab_node *symbol, int index);
5222 /* Simple structure to pass a symbol and index (with same meaning as parameters
5223 of adjust_references_in_caller) through a void* parameter of a
5224 call_for_symbol_thunks_and_aliases callback. */
5225 struct symbol_and_index_together
5227 symtab_node *symbol;
5228 int index;
5231 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5232 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5233 static bool
5234 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5236 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5237 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5238 if (!cs->caller->thunk)
5239 adjust_references_in_caller (cs, pack->symbol, pack->index);
5240 return false;
5243 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5244 variable which is only dereferenced and which is represented by SYMBOL. See
5245 if we can remove ADDR reference in callers assosiated witht the call. */
5247 static void
5248 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5250 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5251 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5252 if (jfunc->type == IPA_JF_CONST)
5254 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5255 cs->lto_stmt_uid,
5256 IPA_REF_ADDR);
5257 if (!to_del)
5258 return;
5259 to_del->remove_reference ();
5260 ipa_zap_jf_refdesc (jfunc);
5261 if (dump_file)
5262 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5263 cs->caller->dump_name (), symbol->dump_name ());
5264 return;
5267 if (jfunc->type != IPA_JF_PASS_THROUGH
5268 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
5269 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
5270 return;
5272 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5273 cgraph_node *caller = cs->caller;
5274 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5275 /* TODO: This consistency check may be too big and not really
5276 that useful. Consider removing it. */
5277 tree cst;
5278 if (caller_info->ipcp_orig_node)
5279 cst = caller_info->known_csts[fidx];
5280 else
5282 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5283 gcc_assert (lat->is_single_const ());
5284 cst = lat->values->value;
5286 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5287 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5288 == symbol));
5290 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5291 if (cuses == IPA_UNDESCRIBED_USE)
5292 return;
5293 gcc_assert (cuses > 0);
5294 cuses--;
5295 ipa_set_controlled_uses (caller_info, fidx, cuses);
5296 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5297 if (dump_file && (dump_flags & TDF_DETAILS))
5298 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5299 "to %i.\n", fidx, caller->dump_name (), cuses);
5300 if (cuses)
5301 return;
5303 if (caller_info->ipcp_orig_node)
5305 /* Cloning machinery has created a reference here, we need to either
5306 remove it or change it to a read one. */
5307 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5308 if (to_del)
5310 to_del->remove_reference ();
5311 if (dump_file)
5312 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5313 cs->caller->dump_name (), symbol->dump_name ());
5314 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5316 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5317 if (dump_file)
5318 fprintf (dump_file,
5319 " ...and replaced it with LOAD one.\n");
5324 symbol_and_index_together pack;
5325 pack.symbol = symbol;
5326 pack.index = fidx;
5327 if (caller->can_change_signature)
5328 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5329 &pack, true);
5333 /* Return true if we would like to remove a parameter from NODE when cloning it
5334 with KNOWN_CSTS scalar constants. */
5336 static bool
5337 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5339 auto_vec<bool, 16> surviving;
5340 bool filled_vec = false;
5341 ipa_node_params *info = ipa_node_params_sum->get (node);
5342 int i, count = ipa_get_param_count (info);
5344 for (i = 0; i < count; i++)
5346 if (!known_csts[i] && ipa_is_param_used (info, i))
5347 continue;
5349 if (!filled_vec)
5351 clone_info *info = clone_info::get (node);
5352 if (!info || !info->param_adjustments)
5353 return true;
5354 info->param_adjustments->get_surviving_params (&surviving);
5355 filled_vec = true;
5357 if (surviving.length() < (unsigned) i && surviving[i])
5358 return true;
5360 return false;
5363 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5364 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5365 redirect all edges in CALLERS to it. */
5367 static struct cgraph_node *
5368 create_specialized_node (struct cgraph_node *node,
5369 vec<tree> known_csts,
5370 vec<ipa_polymorphic_call_context> known_contexts,
5371 vec<ipa_argagg_value, va_gc> *aggvals,
5372 vec<cgraph_edge *> &callers)
5374 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5375 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5376 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5377 struct cgraph_node *new_node;
5378 int i, count = ipa_get_param_count (info);
5379 clone_info *cinfo = clone_info::get (node);
5380 ipa_param_adjustments *old_adjustments = cinfo
5381 ? cinfo->param_adjustments : NULL;
5382 ipa_param_adjustments *new_adjustments;
5383 gcc_assert (!info->ipcp_orig_node);
5384 gcc_assert (node->can_change_signature
5385 || !old_adjustments);
5387 if (old_adjustments)
5389 /* At the moment all IPA optimizations should use the number of
5390 parameters of the prevailing decl as the m_always_copy_start.
5391 Handling any other value would complicate the code below, so for the
5392 time bing let's only assert it is so. */
5393 gcc_assert (old_adjustments->m_always_copy_start == count
5394 || old_adjustments->m_always_copy_start < 0);
5395 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5396 for (i = 0; i < old_adj_count; i++)
5398 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5399 if (!node->can_change_signature
5400 || old_adj->op != IPA_PARAM_OP_COPY
5401 || (!known_csts[old_adj->base_index]
5402 && ipa_is_param_used (info, old_adj->base_index)))
5404 ipa_adjusted_param new_adj = *old_adj;
5406 new_adj.prev_clone_adjustment = true;
5407 new_adj.prev_clone_index = i;
5408 vec_safe_push (new_params, new_adj);
5411 bool skip_return = old_adjustments->m_skip_return;
5412 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5413 ipa_param_adjustments (new_params, count,
5414 skip_return));
5416 else if (node->can_change_signature
5417 && want_remove_some_param_p (node, known_csts))
5419 ipa_adjusted_param adj;
5420 memset (&adj, 0, sizeof (adj));
5421 adj.op = IPA_PARAM_OP_COPY;
5422 for (i = 0; i < count; i++)
5423 if (!known_csts[i] && ipa_is_param_used (info, i))
5425 adj.base_index = i;
5426 adj.prev_clone_index = i;
5427 vec_safe_push (new_params, adj);
5429 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5430 ipa_param_adjustments (new_params, count, false));
5432 else
5433 new_adjustments = NULL;
5435 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5436 for (i = callers.length () - 1; i >= 0; i--)
5438 cgraph_edge *cs = callers[i];
5439 if (cs->caller == node)
5441 self_recursive_calls.safe_push (cs);
5442 callers.unordered_remove (i);
5445 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5446 for (i = 0; i < count; i++)
5448 tree t = known_csts[i];
5449 if (!t)
5450 continue;
5452 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5454 bool load_ref = false;
5455 symtab_node *ref_symbol;
5456 if (TREE_CODE (t) == ADDR_EXPR)
5458 tree base = get_base_address (TREE_OPERAND (t, 0));
5459 if (TREE_CODE (base) == VAR_DECL
5460 && ipa_get_controlled_uses (info, i) == 0
5461 && ipa_get_param_load_dereferenced (info, i)
5462 && (ref_symbol = symtab_node::get (base)))
5464 load_ref = true;
5465 if (node->can_change_signature)
5466 for (cgraph_edge *caller : callers)
5467 adjust_references_in_caller (caller, ref_symbol, i);
5471 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5472 if (replace_map)
5473 vec_safe_push (replace_trees, replace_map);
5476 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5477 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5478 node->decl)));
5479 new_node = node->create_virtual_clone (callers, replace_trees,
5480 new_adjustments, "constprop",
5481 suffix_counter);
5482 suffix_counter++;
5484 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5485 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5487 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5488 /* Cloned edges can disappear during cloning as speculation can be
5489 resolved, check that we have one and that it comes from the last
5490 cloning. */
5491 if (cs && cs->caller == new_node)
5492 cs->redirect_callee_duplicating_thunks (new_node);
5493 /* Any future code that would make more than one clone of an outgoing
5494 edge would confuse this mechanism, so let's check that does not
5495 happen. */
5496 gcc_checking_assert (!cs
5497 || !get_next_cgraph_edge_clone (cs)
5498 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5500 if (have_self_recursive_calls)
5501 new_node->expand_all_artificial_thunks ();
5503 ipa_set_node_agg_value_chain (new_node, aggvals);
5504 for (const ipa_argagg_value &av : aggvals)
5505 new_node->maybe_create_reference (av.value, NULL);
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5509 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5510 if (known_contexts.exists ())
5512 for (i = 0; i < count; i++)
5513 if (!known_contexts[i].useless_p ())
5515 fprintf (dump_file, " known ctx %i is ", i);
5516 known_contexts[i].dump (dump_file);
5519 if (aggvals)
5521 fprintf (dump_file, " Aggregate replacements:");
5522 ipa_argagg_value_list avs (aggvals);
5523 avs.dump (dump_file);
5527 new_info = ipa_node_params_sum->get (new_node);
5528 new_info->ipcp_orig_node = node;
5529 new_node->ipcp_clone = true;
5530 new_info->known_csts = known_csts;
5531 new_info->known_contexts = known_contexts;
5533 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5534 aggvals);
5536 return new_node;
5539 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5540 pass-through function to itself when the cgraph_node involved is not an
5541 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5542 no-operation pass-through. */
5544 static bool
5545 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5546 bool simple = true)
5548 enum availability availability;
5549 if (cs->caller == cs->callee->function_symbol (&availability)
5550 && availability > AVAIL_INTERPOSABLE
5551 && jfunc->type == IPA_JF_PASS_THROUGH
5552 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5553 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5554 && ipa_node_params_sum->get (cs->caller)
5555 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5556 return true;
5557 return false;
5560 /* Return true if JFUNC, which describes a part of an aggregate represented or
5561 pointed to by the i-th parameter of call CS, is a pass-through function to
5562 itself when the cgraph_node involved is not an IPA-CP clone.. When
5563 SIMPLE is true, further check if JFUNC is a simple no-operation
5564 pass-through. */
5566 static bool
5567 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5568 const ipa_agg_jf_item *jfunc,
5569 int i, bool simple = true)
5571 enum availability availability;
5572 if (cs->caller == cs->callee->function_symbol (&availability)
5573 && availability > AVAIL_INTERPOSABLE
5574 && jfunc->jftype == IPA_JF_LOAD_AGG
5575 && jfunc->offset == jfunc->value.load_agg.offset
5576 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5577 && jfunc->value.pass_through.formal_id == i
5578 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5579 && ipa_node_params_sum->get (cs->caller)
5580 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5581 return true;
5582 return false;
5585 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5586 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5588 static void
5589 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5590 vec<tree> &known_csts,
5591 const vec<cgraph_edge *> &callers)
5593 ipa_node_params *info = ipa_node_params_sum->get (node);
5594 int i, count = ipa_get_param_count (info);
5596 for (i = 0; i < count; i++)
5598 struct cgraph_edge *cs;
5599 tree newval = NULL_TREE;
5600 int j;
5601 bool first = true;
5602 tree type = ipa_get_type (info, i);
5604 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5605 continue;
5607 FOR_EACH_VEC_ELT (callers, j, cs)
5609 struct ipa_jump_func *jump_func;
5610 tree t;
5612 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5613 if (!args
5614 || i >= ipa_get_cs_argument_count (args)
5615 || (i == 0
5616 && call_passes_through_thunk (cs)))
5618 newval = NULL_TREE;
5619 break;
5621 jump_func = ipa_get_ith_jump_func (args, i);
5623 /* Besides simple pass-through jump function, arithmetic jump
5624 function could also introduce argument-direct-pass-through for
5625 self-feeding recursive call. For example,
5627 fn (int i)
5629 fn (i & 1);
5632 Given that i is 0, recursive propagation via (i & 1) also gets
5633 0. */
5634 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5636 gcc_assert (newval);
5637 t = ipa_get_jf_arith_result (
5638 ipa_get_jf_pass_through_operation (jump_func),
5639 newval,
5640 ipa_get_jf_pass_through_operand (jump_func),
5641 type);
5643 else
5644 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5645 jump_func, type);
5646 if (!t
5647 || (newval
5648 && !values_equal_for_ipcp_p (t, newval))
5649 || (!first && !newval))
5651 newval = NULL_TREE;
5652 break;
5654 else
5655 newval = t;
5656 first = false;
5659 if (newval)
5661 if (dump_file && (dump_flags & TDF_DETAILS))
5663 fprintf (dump_file, " adding an extra known scalar value ");
5664 print_ipcp_constant_value (dump_file, newval);
5665 fprintf (dump_file, " for ");
5666 ipa_dump_param (dump_file, info, i);
5667 fprintf (dump_file, "\n");
5670 known_csts[i] = newval;
5675 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5676 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5677 CALLERS. */
5679 static void
5680 find_more_contexts_for_caller_subset (cgraph_node *node,
5681 vec<ipa_polymorphic_call_context>
5682 *known_contexts,
5683 const vec<cgraph_edge *> &callers)
5685 ipa_node_params *info = ipa_node_params_sum->get (node);
5686 int i, count = ipa_get_param_count (info);
5688 for (i = 0; i < count; i++)
5690 cgraph_edge *cs;
5692 if (ipa_get_poly_ctx_lat (info, i)->bottom
5693 || (known_contexts->exists ()
5694 && !(*known_contexts)[i].useless_p ()))
5695 continue;
5697 ipa_polymorphic_call_context newval;
5698 bool first = true;
5699 int j;
5701 FOR_EACH_VEC_ELT (callers, j, cs)
5703 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5704 if (!args
5705 || i >= ipa_get_cs_argument_count (args))
5706 return;
5707 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5708 ipa_polymorphic_call_context ctx;
5709 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5710 cs, i, jfunc);
5711 if (first)
5713 newval = ctx;
5714 first = false;
5716 else
5717 newval.meet_with (ctx);
5718 if (newval.useless_p ())
5719 break;
5722 if (!newval.useless_p ())
5724 if (dump_file && (dump_flags & TDF_DETAILS))
5726 fprintf (dump_file, " adding an extra known polymorphic "
5727 "context ");
5728 print_ipcp_constant_value (dump_file, newval);
5729 fprintf (dump_file, " for ");
5730 ipa_dump_param (dump_file, info, i);
5731 fprintf (dump_file, "\n");
5734 if (!known_contexts->exists ())
5735 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5736 true);
5737 (*known_contexts)[i] = newval;
5743 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5744 RES. If INTERIM is non-NULL, it contains the current interim state of
5745 collected aggregate values which can be used to compute values passed over
5746 self-recursive edges.
5748 This basically one iteration of push_agg_values_from_edge over one
5749 parameter, which allows for simpler early returns. */
5751 static void
5752 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5753 vec<ipa_argagg_value> *res,
5754 const ipa_argagg_value_list *interim)
5756 bool agg_values_from_caller = false;
5757 bool agg_jf_preserved = false;
5758 unsigned unit_delta = UINT_MAX;
5759 int src_idx = -1;
5760 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5761 index);
5763 if (jfunc->type == IPA_JF_PASS_THROUGH
5764 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5766 agg_values_from_caller = true;
5767 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5768 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5769 unit_delta = 0;
5771 else if (jfunc->type == IPA_JF_ANCESTOR
5772 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5774 agg_values_from_caller = true;
5775 agg_jf_preserved = true;
5776 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5777 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5780 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5781 if (agg_values_from_caller)
5783 if (caller_info->ipcp_orig_node)
5785 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5786 ipcp_transformation *ts
5787 = ipcp_get_transformation_summary (cs->caller);
5788 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5789 ipcp_param_lattices *orig_plats
5790 = ipa_get_parm_lattices (orig_info, src_idx);
5791 if (ts
5792 && orig_plats->aggs
5793 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5795 ipa_argagg_value_list src (ts);
5796 src.push_adjusted_values (src_idx, index, unit_delta, res);
5797 return;
5800 else
5802 ipcp_param_lattices *src_plats
5803 = ipa_get_parm_lattices (caller_info, src_idx);
5804 if (src_plats->aggs
5805 && !src_plats->aggs_bottom
5806 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5808 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5810 interim->push_adjusted_values (src_idx, index, unit_delta,
5811 res);
5812 return;
5814 if (!src_plats->aggs_contain_variable)
5816 push_agg_values_from_plats (src_plats, index, unit_delta,
5817 res);
5818 return;
5824 if (!jfunc->agg.items)
5825 return;
5826 bool first = true;
5827 unsigned prev_unit_offset = 0;
5828 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5830 tree value, srcvalue;
5831 /* Besides simple pass-through aggregate jump function, arithmetic
5832 aggregate jump function could also bring same aggregate value as
5833 parameter passed-in for self-feeding recursive call. For example,
5835 fn (int *i)
5837 int j = *i & 1;
5838 fn (&j);
5841 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5842 if (interim
5843 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5844 && (srcvalue = interim->get_value(index,
5845 agg_jf.offset / BITS_PER_UNIT)))
5846 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5847 srcvalue,
5848 agg_jf.value.pass_through.operand,
5849 agg_jf.type);
5850 else
5851 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5852 &agg_jf);
5853 if (value)
5855 struct ipa_argagg_value iav;
5856 iav.value = value;
5857 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5858 iav.index = index;
5859 iav.by_ref = jfunc->agg.by_ref;
5861 gcc_assert (first
5862 || iav.unit_offset > prev_unit_offset);
5863 prev_unit_offset = iav.unit_offset;
5864 first = false;
5866 res->safe_push (iav);
5869 return;
5872 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5873 description of ultimate callee of CS or the one it was cloned from (the
5874 summary where lattices are). If INTERIM is non-NULL, it contains the
5875 current interim state of collected aggregate values which can be used to
5876 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5877 is true) and to skip values which clearly will not be part of intersection
5878 with INTERIM. */
5880 static void
5881 push_agg_values_from_edge (struct cgraph_edge *cs,
5882 ipa_node_params *dest_info,
5883 vec<ipa_argagg_value> *res,
5884 const ipa_argagg_value_list *interim,
5885 bool optimize_self_recursion)
5887 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5888 if (!args)
5889 return;
5891 int count = MIN (ipa_get_param_count (dest_info),
5892 ipa_get_cs_argument_count (args));
5894 unsigned interim_index = 0;
5895 for (int index = 0; index < count; index++)
5897 if (interim)
5899 while (interim_index < interim->m_elts.size ()
5900 && interim->m_elts[interim_index].value
5901 && interim->m_elts[interim_index].index < index)
5902 interim_index++;
5903 if (interim_index >= interim->m_elts.size ()
5904 || interim->m_elts[interim_index].index > index)
5905 continue;
5908 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5909 if (!ipa_is_param_used (dest_info, index)
5910 || plats->aggs_bottom)
5911 continue;
5912 push_agg_values_for_index_from_edge (cs, index, res,
5913 optimize_self_recursion ? interim
5914 : NULL);
5919 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5920 from all of them. Return nullptr if there are none. */
5922 static struct vec<ipa_argagg_value, va_gc> *
5923 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5924 const vec<cgraph_edge *> &callers)
5926 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5927 if (dest_info->ipcp_orig_node)
5928 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5930 /* gather_edges_for_value puts a non-recursive call into the first element of
5931 callers if it can. */
5932 auto_vec<ipa_argagg_value, 32> interim;
5933 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5935 unsigned valid_entries = interim.length ();
5936 if (!valid_entries)
5937 return nullptr;
5939 unsigned caller_count = callers.length();
5940 for (unsigned i = 1; i < caller_count; i++)
5942 auto_vec<ipa_argagg_value, 32> last;
5943 ipa_argagg_value_list avs (&interim);
5944 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5946 valid_entries = intersect_argaggs_with (interim, last);
5947 if (!valid_entries)
5948 return nullptr;
5951 vec<ipa_argagg_value, va_gc> *res = NULL;
5952 vec_safe_reserve_exact (res, valid_entries);
5953 for (const ipa_argagg_value &av : interim)
5954 if (av.value)
5955 res->quick_push(av);
5956 gcc_checking_assert (res->length () == valid_entries);
5957 return res;
5960 /* Determine whether CS also brings all scalar values that the NODE is
5961 specialized for. */
5963 static bool
5964 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5965 struct cgraph_node *node)
5967 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5968 int count = ipa_get_param_count (dest_info);
5969 class ipa_node_params *caller_info;
5970 class ipa_edge_args *args;
5971 int i;
5973 caller_info = ipa_node_params_sum->get (cs->caller);
5974 args = ipa_edge_args_sum->get (cs);
5975 for (i = 0; i < count; i++)
5977 struct ipa_jump_func *jump_func;
5978 tree val, t;
5980 val = dest_info->known_csts[i];
5981 if (!val)
5982 continue;
5984 if (i >= ipa_get_cs_argument_count (args))
5985 return false;
5986 jump_func = ipa_get_ith_jump_func (args, i);
5987 t = ipa_value_from_jfunc (caller_info, jump_func,
5988 ipa_get_type (dest_info, i));
5989 if (!t || !values_equal_for_ipcp_p (val, t))
5990 return false;
5992 return true;
5995 /* Determine whether CS also brings all aggregate values that NODE is
5996 specialized for. */
5998 static bool
5999 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
6000 struct cgraph_node *node)
6002 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
6003 if (!ts || vec_safe_is_empty (ts->m_agg_values))
6004 return true;
6006 const ipa_argagg_value_list existing (ts->m_agg_values);
6007 auto_vec<ipa_argagg_value, 32> edge_values;
6008 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
6009 gcc_checking_assert (dest_info->ipcp_orig_node);
6010 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
6011 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
6012 const ipa_argagg_value_list avl (&edge_values);
6013 return avl.superset_of_p (existing);
6016 /* Given an original NODE and a VAL for which we have already created a
6017 specialized clone, look whether there are incoming edges that still lead
6018 into the old node but now also bring the requested value and also conform to
6019 all other criteria such that they can be redirected the special node.
6020 This function can therefore redirect the final edge in a SCC. */
6022 template <typename valtype>
6023 static void
6024 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
6026 ipcp_value_source<valtype> *src;
6027 profile_count redirected_sum = profile_count::zero ();
6029 for (src = val->sources; src; src = src->next)
6031 struct cgraph_edge *cs = src->cs;
6032 while (cs)
6034 if (cgraph_edge_brings_value_p (cs, src, node, val)
6035 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
6036 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
6038 if (dump_file)
6039 fprintf (dump_file, " - adding an extra caller %s of %s\n",
6040 cs->caller->dump_name (),
6041 val->spec_node->dump_name ());
6043 cs->redirect_callee_duplicating_thunks (val->spec_node);
6044 val->spec_node->expand_all_artificial_thunks ();
6045 if (cs->count.ipa ().initialized_p ())
6046 redirected_sum = redirected_sum + cs->count.ipa ();
6048 cs = get_next_cgraph_edge_clone (cs);
6052 if (redirected_sum.nonzero_p ())
6053 update_specialized_profile (val->spec_node, node, redirected_sum);
6056 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
6058 static bool
6059 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
6061 ipa_polymorphic_call_context *ctx;
6062 int i;
6064 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
6065 if (!ctx->useless_p ())
6066 return true;
6067 return false;
6070 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
6072 static vec<ipa_polymorphic_call_context>
6073 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
6075 if (known_contexts_useful_p (known_contexts))
6076 return known_contexts.copy ();
6077 else
6078 return vNULL;
6081 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
6082 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
6083 copy too. */
6085 static void
6086 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6087 vec<tree> *known_csts,
6088 vec<ipa_polymorphic_call_context> *known_contexts,
6089 ipcp_value<tree> *val, int index)
6091 *known_csts = avals->m_known_vals.copy ();
6092 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6093 (*known_csts)[index] = val->value;
6096 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6097 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6098 INDEX. */
6100 static void
6101 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6102 vec<tree> *known_csts,
6103 vec<ipa_polymorphic_call_context> *known_contexts,
6104 ipcp_value<ipa_polymorphic_call_context> *val,
6105 int index)
6107 *known_csts = avals->m_known_vals.copy ();
6108 *known_contexts = avals->m_known_contexts.copy ();
6109 (*known_contexts)[index] = val->value;
6112 /* Return true if OFFSET indicates this was not an aggregate value or there is
6113 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6114 AGGVALS list. */
6116 DEBUG_FUNCTION bool
6117 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6118 int index, HOST_WIDE_INT offset, tree value)
6120 if (offset == -1)
6121 return true;
6123 const ipa_argagg_value_list avl (aggvals);
6124 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6125 return v && values_equal_for_ipcp_p (v, value);
6128 /* Return true if offset is minus one because source of a polymorphic context
6129 cannot be an aggregate value. */
6131 DEBUG_FUNCTION bool
6132 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6133 int , HOST_WIDE_INT offset,
6134 ipa_polymorphic_call_context)
6136 return offset == -1;
6139 /* Decide whether to create a special version of NODE for value VAL of
6140 parameter at the given INDEX. If OFFSET is -1, the value is for the
6141 parameter itself, otherwise it is stored at the given OFFSET of the
6142 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6143 is a vector which contains clones created for self-recursive calls with an
6144 arithmetic pass-through jump function. */
6146 template <typename valtype>
6147 static bool
6148 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6149 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6150 vec<cgraph_node *> *self_gen_clones)
6152 int caller_count;
6153 sreal freq_sum;
6154 profile_count count_sum, rec_count_sum;
6155 vec<cgraph_edge *> callers;
6157 if (val->spec_node)
6159 perhaps_add_new_callers (node, val);
6160 return false;
6162 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6164 if (dump_file && (dump_flags & TDF_DETAILS))
6165 fprintf (dump_file, " Ignoring candidate value because "
6166 "maximum unit size would be reached with %li.\n",
6167 val->local_size_cost + overall_size);
6168 return false;
6170 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6171 &rec_count_sum, &count_sum))
6172 return false;
6174 if (!dbg_cnt (ipa_cp_values))
6175 return false;
6177 if (val->self_recursion_generated_p ())
6179 /* The edge counts in this case might not have been adjusted yet.
6180 Nevertleless, even if they were it would be only a guesswork which we
6181 can do now. The recursive part of the counts can be derived from the
6182 count of the original node anyway. */
6183 if (node->count.ipa ().nonzero_p ())
6185 unsigned dem = self_gen_clones->length () + 1;
6186 rec_count_sum = node->count.ipa () / dem;
6188 else
6189 rec_count_sum = profile_count::zero ();
6192 /* get_info_about_necessary_edges only sums up ipa counts. */
6193 count_sum += rec_count_sum;
6195 if (dump_file && (dump_flags & TDF_DETAILS))
6197 fprintf (dump_file, " - considering value ");
6198 print_ipcp_constant_value (dump_file, val->value);
6199 fprintf (dump_file, " for ");
6200 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6201 if (offset != -1)
6202 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6203 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6206 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6207 freq_sum, count_sum,
6208 val->local_size_cost)
6209 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6210 freq_sum, count_sum, val->prop_size_cost))
6211 return false;
6213 if (dump_file)
6214 fprintf (dump_file, " Creating a specialized node of %s.\n",
6215 node->dump_name ());
6217 vec<tree> known_csts;
6218 vec<ipa_polymorphic_call_context> known_contexts;
6220 callers = gather_edges_for_value (val, node, caller_count);
6221 if (offset == -1)
6222 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6223 else
6225 known_csts = avals->m_known_vals.copy ();
6226 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6228 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6229 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6230 vec<ipa_argagg_value, va_gc> *aggvals
6231 = find_aggregate_values_for_callers_subset (node, callers);
6232 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6233 offset, val->value));
6234 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6235 aggvals, callers);
6237 if (val->self_recursion_generated_p ())
6238 self_gen_clones->safe_push (val->spec_node);
6239 else
6240 update_profiling_info (node, val->spec_node);
6242 callers.release ();
6243 overall_size += val->local_size_cost;
6244 if (dump_file && (dump_flags & TDF_DETAILS))
6245 fprintf (dump_file, " overall size reached %li\n",
6246 overall_size);
6248 /* TODO: If for some lattice there is only one other known value
6249 left, make a special node for it too. */
6251 return true;
6254 /* Like irange::contains_p(), but convert VAL to the range of R if
6255 necessary. */
6257 static inline bool
6258 ipa_range_contains_p (const vrange &r, tree val)
6260 if (r.undefined_p ())
6261 return false;
6263 tree type = r.type ();
6264 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
6265 return false;
6267 val = fold_convert (type, val);
6268 return r.contains_p (val);
6271 /* Decide whether and what specialized clones of NODE should be created. */
6273 static bool
6274 decide_whether_version_node (struct cgraph_node *node)
6276 ipa_node_params *info = ipa_node_params_sum->get (node);
6277 int i, count = ipa_get_param_count (info);
6278 bool ret = false;
6280 if (count == 0)
6281 return false;
6283 if (dump_file && (dump_flags & TDF_DETAILS))
6284 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6285 node->dump_name ());
6287 auto_vec <cgraph_node *, 9> self_gen_clones;
6288 ipa_auto_call_arg_values avals;
6289 gather_context_independent_values (info, &avals, false, NULL);
6291 for (i = 0; i < count;i++)
6293 if (!ipa_is_param_used (info, i))
6294 continue;
6296 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6297 ipcp_lattice<tree> *lat = &plats->itself;
6298 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6300 if (!lat->bottom
6301 && !avals.m_known_vals[i])
6303 ipcp_value<tree> *val;
6304 for (val = lat->values; val; val = val->next)
6306 /* If some values generated for self-recursive calls with
6307 arithmetic jump functions fall outside of the known
6308 range for the parameter, we can skip them. */
6309 if (TREE_CODE (val->value) == INTEGER_CST
6310 && !plats->m_value_range.bottom_p ()
6311 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6312 val->value))
6314 /* This can happen also if a constant present in the source
6315 code falls outside of the range of parameter's type, so we
6316 cannot assert. */
6317 if (dump_file && (dump_flags & TDF_DETAILS))
6319 fprintf (dump_file, " - skipping%s value ",
6320 val->self_recursion_generated_p ()
6321 ? " self_recursion_generated" : "");
6322 print_ipcp_constant_value (dump_file, val->value);
6323 fprintf (dump_file, " because it is outside known "
6324 "value range.\n");
6326 continue;
6328 ret |= decide_about_value (node, i, -1, val, &avals,
6329 &self_gen_clones);
6333 if (!plats->aggs_bottom)
6335 struct ipcp_agg_lattice *aglat;
6336 ipcp_value<tree> *val;
6337 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6338 if (!aglat->bottom && aglat->values
6339 /* If the following is false, the one value has been considered
6340 for cloning for all contexts. */
6341 && (plats->aggs_contain_variable
6342 || !aglat->is_single_const ()))
6343 for (val = aglat->values; val; val = val->next)
6344 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6345 &self_gen_clones);
6348 if (!ctxlat->bottom
6349 && avals.m_known_contexts[i].useless_p ())
6351 ipcp_value<ipa_polymorphic_call_context> *val;
6352 for (val = ctxlat->values; val; val = val->next)
6353 ret |= decide_about_value (node, i, -1, val, &avals,
6354 &self_gen_clones);
6358 if (!self_gen_clones.is_empty ())
6360 self_gen_clones.safe_push (node);
6361 update_counts_for_self_gen_clones (node, self_gen_clones);
6364 if (info->do_clone_for_all_contexts)
6366 if (!dbg_cnt (ipa_cp_values))
6368 info->do_clone_for_all_contexts = false;
6369 return ret;
6372 struct cgraph_node *clone;
6373 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6375 for (int i = callers.length () - 1; i >= 0; i--)
6377 cgraph_edge *cs = callers[i];
6378 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6380 if (caller_info && caller_info->node_dead)
6381 callers.unordered_remove (i);
6384 if (!adjust_callers_for_value_intersection (callers, node))
6386 /* If node is not called by anyone, or all its caller edges are
6387 self-recursive, the node is not really in use, no need to do
6388 cloning. */
6389 info->do_clone_for_all_contexts = false;
6390 return ret;
6393 if (dump_file)
6394 fprintf (dump_file, " - Creating a specialized node of %s "
6395 "for all known contexts.\n", node->dump_name ());
6397 vec<tree> known_csts = avals.m_known_vals.copy ();
6398 vec<ipa_polymorphic_call_context> known_contexts
6399 = copy_useful_known_contexts (avals.m_known_contexts);
6400 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6401 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6402 vec<ipa_argagg_value, va_gc> *aggvals
6403 = find_aggregate_values_for_callers_subset (node, callers);
6405 if (!known_contexts_useful_p (known_contexts))
6407 known_contexts.release ();
6408 known_contexts = vNULL;
6410 clone = create_specialized_node (node, known_csts, known_contexts,
6411 aggvals, callers);
6412 info->do_clone_for_all_contexts = false;
6413 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6414 ret = true;
6417 return ret;
6420 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6422 static void
6423 spread_undeadness (struct cgraph_node *node)
6425 struct cgraph_edge *cs;
6427 for (cs = node->callees; cs; cs = cs->next_callee)
6428 if (ipa_edge_within_scc (cs))
6430 struct cgraph_node *callee;
6431 class ipa_node_params *info;
6433 callee = cs->callee->function_symbol (NULL);
6434 info = ipa_node_params_sum->get (callee);
6436 if (info && info->node_dead)
6438 info->node_dead = 0;
6439 spread_undeadness (callee);
6444 /* Return true if NODE has a caller from outside of its SCC that is not
6445 dead. Worker callback for cgraph_for_node_and_aliases. */
6447 static bool
6448 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6449 void *data ATTRIBUTE_UNUSED)
6451 struct cgraph_edge *cs;
6453 for (cs = node->callers; cs; cs = cs->next_caller)
6454 if (cs->caller->thunk
6455 && cs->caller->call_for_symbol_thunks_and_aliases
6456 (has_undead_caller_from_outside_scc_p, NULL, true))
6457 return true;
6458 else if (!ipa_edge_within_scc (cs))
6460 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6461 if (!caller_info /* Unoptimized caller are like dead ones. */
6462 || !caller_info->node_dead)
6463 return true;
6465 return false;
6469 /* Identify nodes within the same SCC as NODE which are no longer needed
6470 because of new clones and will be removed as unreachable. */
6472 static void
6473 identify_dead_nodes (struct cgraph_node *node)
6475 struct cgraph_node *v;
6476 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6477 if (v->local)
6479 ipa_node_params *info = ipa_node_params_sum->get (v);
6480 if (info
6481 && !v->call_for_symbol_thunks_and_aliases
6482 (has_undead_caller_from_outside_scc_p, NULL, true))
6483 info->node_dead = 1;
6486 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6488 ipa_node_params *info = ipa_node_params_sum->get (v);
6489 if (info && !info->node_dead)
6490 spread_undeadness (v);
6493 if (dump_file && (dump_flags & TDF_DETAILS))
6495 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6496 if (ipa_node_params_sum->get (v)
6497 && ipa_node_params_sum->get (v)->node_dead)
6498 fprintf (dump_file, " Marking node as dead: %s.\n",
6499 v->dump_name ());
6503 /* The decision stage. Iterate over the topological order of call graph nodes
6504 TOPO and make specialized clones if deemed beneficial. */
6506 static void
6507 ipcp_decision_stage (class ipa_topo_info *topo)
6509 int i;
6511 if (dump_file)
6512 fprintf (dump_file, "\nIPA decision stage:\n\n");
6514 for (i = topo->nnodes - 1; i >= 0; i--)
6516 struct cgraph_node *node = topo->order[i];
6517 bool change = false, iterate = true;
6519 while (iterate)
6521 struct cgraph_node *v;
6522 iterate = false;
6523 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6524 if (v->has_gimple_body_p ()
6525 && ipcp_versionable_function_p (v))
6526 iterate |= decide_whether_version_node (v);
6528 change |= iterate;
6530 if (change)
6531 identify_dead_nodes (node);
6535 /* Look up all VR and bits information that we have discovered and copy it
6536 over to the transformation summary. */
6538 static void
6539 ipcp_store_vr_results (void)
6541 cgraph_node *node;
6543 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6545 ipa_node_params *info = ipa_node_params_sum->get (node);
6546 bool dumped_sth = false;
6547 bool found_useful_result = false;
6548 bool do_vr = true;
6549 bool do_bits = true;
6551 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6553 if (dump_file)
6554 fprintf (dump_file, "Not considering %s for VR discovery "
6555 "and propagate; -fipa-ipa-vrp: disabled.\n",
6556 node->dump_name ());
6557 do_vr = false;
6559 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6561 if (dump_file)
6562 fprintf (dump_file, "Not considering %s for ipa bitwise "
6563 "propagation ; -fipa-bit-cp: disabled.\n",
6564 node->dump_name ());
6565 do_bits = false;
6567 if (!do_bits && !do_vr)
6568 continue;
6570 if (info->ipcp_orig_node)
6571 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6572 if (!info->lattices)
6573 /* Newly expanded artificial thunks do not have lattices. */
6574 continue;
6576 unsigned count = ipa_get_param_count (info);
6577 for (unsigned i = 0; i < count; i++)
6579 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6580 if (do_vr
6581 && !plats->m_value_range.bottom_p ()
6582 && !plats->m_value_range.top_p ())
6584 found_useful_result = true;
6585 break;
6587 if (do_bits && plats->bits_lattice.constant_p ())
6589 found_useful_result = true;
6590 break;
6593 if (!found_useful_result)
6594 continue;
6596 ipcp_transformation_initialize ();
6597 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6598 vec_safe_reserve_exact (ts->m_vr, count);
6600 for (unsigned i = 0; i < count; i++)
6602 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6603 ipcp_bits_lattice *bits = NULL;
6605 if (do_bits
6606 && plats->bits_lattice.constant_p ()
6607 && dbg_cnt (ipa_cp_bits))
6608 bits = &plats->bits_lattice;
6610 if (do_vr
6611 && !plats->m_value_range.bottom_p ()
6612 && !plats->m_value_range.top_p ()
6613 && dbg_cnt (ipa_cp_vr))
6615 if (bits)
6617 Value_Range tmp = plats->m_value_range.m_vr;
6618 tree type = ipa_get_type (info, i);
6619 irange &r = as_a<irange> (tmp);
6620 irange_bitmask bm (wide_int::from (bits->get_value (),
6621 TYPE_PRECISION (type),
6622 TYPE_SIGN (type)),
6623 wide_int::from (bits->get_mask (),
6624 TYPE_PRECISION (type),
6625 TYPE_SIGN (type)));
6626 r.update_bitmask (bm);
6627 ipa_vr vr (tmp);
6628 ts->m_vr->quick_push (vr);
6630 else
6632 ipa_vr vr (plats->m_value_range.m_vr);
6633 ts->m_vr->quick_push (vr);
6636 else if (bits)
6638 tree type = ipa_get_type (info, i);
6639 Value_Range tmp;
6640 tmp.set_varying (type);
6641 irange &r = as_a<irange> (tmp);
6642 irange_bitmask bm (wide_int::from (bits->get_value (),
6643 TYPE_PRECISION (type),
6644 TYPE_SIGN (type)),
6645 wide_int::from (bits->get_mask (),
6646 TYPE_PRECISION (type),
6647 TYPE_SIGN (type)));
6648 r.update_bitmask (bm);
6649 ipa_vr vr (tmp);
6650 ts->m_vr->quick_push (vr);
6652 else
6654 ipa_vr vr;
6655 ts->m_vr->quick_push (vr);
6658 if (!dump_file || !bits)
6659 continue;
6661 if (!dumped_sth)
6663 fprintf (dump_file, "Propagated bits info for function %s:\n",
6664 node->dump_name ());
6665 dumped_sth = true;
6667 fprintf (dump_file, " param %i: value = ", i);
6668 print_hex (bits->get_value (), dump_file);
6669 fprintf (dump_file, ", mask = ");
6670 print_hex (bits->get_mask (), dump_file);
6671 fprintf (dump_file, "\n");
6676 /* The IPCP driver. */
6678 static unsigned int
6679 ipcp_driver (void)
6681 class ipa_topo_info topo;
6683 if (edge_clone_summaries == NULL)
6684 edge_clone_summaries = new edge_clone_summary_t (symtab);
6686 ipa_check_create_node_params ();
6687 ipa_check_create_edge_args ();
6688 clone_num_suffixes = new hash_map<const char *, unsigned>;
6690 if (dump_file)
6692 fprintf (dump_file, "\nIPA structures before propagation:\n");
6693 if (dump_flags & TDF_DETAILS)
6694 ipa_print_all_params (dump_file);
6695 ipa_print_all_jump_functions (dump_file);
6698 /* Topological sort. */
6699 build_toporder_info (&topo);
6700 /* Do the interprocedural propagation. */
6701 ipcp_propagate_stage (&topo);
6702 /* Decide what constant propagation and cloning should be performed. */
6703 ipcp_decision_stage (&topo);
6704 /* Store results of value range and bits propagation. */
6705 ipcp_store_vr_results ();
6707 /* Free all IPCP structures. */
6708 delete clone_num_suffixes;
6709 free_toporder_info (&topo);
6710 delete edge_clone_summaries;
6711 edge_clone_summaries = NULL;
6712 ipa_free_all_structures_after_ipa_cp ();
6713 if (dump_file)
6714 fprintf (dump_file, "\nIPA constant propagation end\n");
6715 return 0;
6718 /* Initialization and computation of IPCP data structures. This is the initial
6719 intraprocedural analysis of functions, which gathers information to be
6720 propagated later on. */
6722 static void
6723 ipcp_generate_summary (void)
6725 struct cgraph_node *node;
6727 if (dump_file)
6728 fprintf (dump_file, "\nIPA constant propagation start:\n");
6729 ipa_register_cgraph_hooks ();
6731 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6732 ipa_analyze_node (node);
6735 namespace {
6737 const pass_data pass_data_ipa_cp =
6739 IPA_PASS, /* type */
6740 "cp", /* name */
6741 OPTGROUP_NONE, /* optinfo_flags */
6742 TV_IPA_CONSTANT_PROP, /* tv_id */
6743 0, /* properties_required */
6744 0, /* properties_provided */
6745 0, /* properties_destroyed */
6746 0, /* todo_flags_start */
6747 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6750 class pass_ipa_cp : public ipa_opt_pass_d
6752 public:
6753 pass_ipa_cp (gcc::context *ctxt)
6754 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6755 ipcp_generate_summary, /* generate_summary */
6756 NULL, /* write_summary */
6757 NULL, /* read_summary */
6758 ipcp_write_transformation_summaries, /*
6759 write_optimization_summary */
6760 ipcp_read_transformation_summaries, /*
6761 read_optimization_summary */
6762 NULL, /* stmt_fixup */
6763 0, /* function_transform_todo_flags_start */
6764 ipcp_transform_function, /* function_transform */
6765 NULL) /* variable_transform */
6768 /* opt_pass methods: */
6769 bool gate (function *) final override
6771 /* FIXME: We should remove the optimize check after we ensure we never run
6772 IPA passes when not optimizing. */
6773 return (flag_ipa_cp && optimize) || in_lto_p;
6776 unsigned int execute (function *) final override { return ipcp_driver (); }
6778 }; // class pass_ipa_cp
6780 } // anon namespace
6782 ipa_opt_pass_d *
6783 make_pass_ipa_cp (gcc::context *ctxt)
6785 return new pass_ipa_cp (ctxt);
6788 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6789 within the same process. For use by toplev::finalize. */
6791 void
6792 ipa_cp_cc_finalize (void)
6794 base_count = profile_count::uninitialized ();
6795 overall_size = 0;
6796 orig_overall_size = 0;
6797 ipcp_free_transformation_sum ();
6800 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6801 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6802 DECL_UID-sorted vector if available (which is pre-computed only if there are
6803 many parameters). Can return -1 if param is static chain not represented
6804 among DECL_ARGUMENTS. */
6807 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6809 gcc_assert (TREE_CODE (param) == PARM_DECL);
6810 if (m_uid_to_idx)
6812 unsigned puid = DECL_UID (param);
6813 const ipa_uid_to_idx_map_elt *res
6814 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6815 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6817 return elt.uid < uid;
6819 if (res == m_uid_to_idx->end ()
6820 || res->uid != puid)
6822 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6823 return -1;
6825 return res->index;
6828 unsigned index = 0;
6829 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6830 if (p == param)
6831 return (int) index;
6833 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6834 return -1;
6837 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6838 according to the uid. */
6840 static int
6841 compare_uids (const void *a, const void *b)
6843 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6844 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6845 if (e1->uid < e2->uid)
6846 return -1;
6847 if (e1->uid > e2->uid)
6848 return 1;
6849 gcc_unreachable ();
6852 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6853 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6854 from parameters to their indices in DECL_ARGUMENTS chain. */
6856 void
6857 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6859 int c = count_formal_params (fndecl);
6860 if (c < 32)
6861 return;
6863 m_uid_to_idx = NULL;
6864 vec_safe_reserve (m_uid_to_idx, c, true);
6865 unsigned index = 0;
6866 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6868 ipa_uid_to_idx_map_elt elt;
6869 elt.uid = DECL_UID (p);
6870 elt.index = index;
6871 m_uid_to_idx->quick_push (elt);
6873 m_uid_to_idx->qsort (compare_uids);