c++: Implement C++23 P2266R1, Simpler implicit move [PR101165]
[official-gcc.git] / gcc / ipa-cp.cc
blob543a9334e2c1ffab4c8cb374f7de02e3bd979cd7
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-iterator.h"
117 #include "gimple-fold.h"
118 #include "symbol-summary.h"
119 #include "tree-vrp.h"
120 #include "ipa-prop.h"
121 #include "tree-pretty-print.h"
122 #include "tree-inline.h"
123 #include "ipa-fnsummary.h"
124 #include "ipa-utils.h"
125 #include "tree-ssa-ccp.h"
126 #include "stringpool.h"
127 #include "attribs.h"
128 #include "dbgcnt.h"
129 #include "symtab-clones.h"
131 template <typename valtype> class ipcp_value;
133 /* Describes a particular source for an IPA-CP value. */
135 template <typename valtype>
136 struct ipcp_value_source
138 public:
139 /* Aggregate offset of the source, negative if the source is scalar value of
140 the argument itself. */
141 HOST_WIDE_INT offset;
142 /* The incoming edge that brought the value. */
143 cgraph_edge *cs;
144 /* If the jump function that resulted into his value was a pass-through or an
145 ancestor, this is the ipcp_value of the caller from which the described
146 value has been derived. Otherwise it is NULL. */
147 ipcp_value<valtype> *val;
148 /* Next pointer in a linked list of sources of a value. */
149 ipcp_value_source *next;
150 /* If the jump function that resulted into his value was a pass-through or an
151 ancestor, this is the index of the parameter of the caller the jump
152 function references. */
153 int index;
156 /* Common ancestor for all ipcp_value instantiations. */
158 class ipcp_value_base
160 public:
161 /* Time benefit and that specializing the function for this value would bring
162 about in this function alone. */
163 sreal local_time_benefit;
164 /* Time benefit that specializing the function for this value can bring about
165 in it's callees. */
166 sreal prop_time_benefit;
167 /* Size cost that specializing the function for this value would bring about
168 in this function alone. */
169 int local_size_cost;
170 /* Size cost that specializing the function for this value can bring about in
171 it's callees. */
172 int prop_size_cost;
174 ipcp_value_base ()
175 : local_time_benefit (0), prop_time_benefit (0),
176 local_size_cost (0), prop_size_cost (0) {}
179 /* Describes one particular value stored in struct ipcp_lattice. */
181 template <typename valtype>
182 class ipcp_value : public ipcp_value_base
184 public:
185 /* The actual value for the given parameter. */
186 valtype value;
187 /* The list of sources from which this value originates. */
188 ipcp_value_source <valtype> *sources = nullptr;
189 /* Next pointers in a linked list of all values in a lattice. */
190 ipcp_value *next = nullptr;
191 /* Next pointers in a linked list of values in a strongly connected component
192 of values. */
193 ipcp_value *scc_next = nullptr;
194 /* Next pointers in a linked list of SCCs of values sorted topologically
195 according their sources. */
196 ipcp_value *topo_next = nullptr;
197 /* A specialized node created for this value, NULL if none has been (so far)
198 created. */
199 cgraph_node *spec_node = nullptr;
200 /* Depth first search number and low link for topological sorting of
201 values. */
202 int dfs = 0;
203 int low_link = 0;
204 /* SCC number to identify values which recursively feed into each other.
205 Values in the same SCC have the same SCC number. */
206 int scc_no = 0;
207 /* Non zero if the value is generated from another value in the same lattice
208 for a self-recursive call, the actual number is how many times the
209 operation has been performed. In the unlikely event of the value being
210 present in two chains fo self-recursive value generation chains, it is the
211 maximum. */
212 unsigned self_recursion_generated_level = 0;
213 /* True if this value is currently on the topo-sort stack. */
214 bool on_stack = false;
216 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
217 HOST_WIDE_INT offset);
219 /* Return true if both THIS value and O feed into each other. */
221 bool same_scc (const ipcp_value<valtype> *o)
223 return o->scc_no == scc_no;
226 /* Return true, if a this value has been generated for a self-recursive call as
227 a result of an arithmetic pass-through jump-function acting on a value in
228 the same lattice function. */
230 bool self_recursion_generated_p ()
232 return self_recursion_generated_level > 0;
236 /* Lattice describing potential values of a formal parameter of a function, or
237 a part of an aggregate. TOP is represented by a lattice with zero values
238 and with contains_variable and bottom flags cleared. BOTTOM is represented
239 by a lattice with the bottom flag set. In that case, values and
240 contains_variable flag should be disregarded. */
242 template <typename valtype>
243 struct ipcp_lattice
245 public:
246 /* The list of known values and types in this lattice. Note that values are
247 not deallocated if a lattice is set to bottom because there may be value
248 sources referencing them. */
249 ipcp_value<valtype> *values;
250 /* Number of known values and types in this lattice. */
251 int values_count;
252 /* The lattice contains a variable component (in addition to values). */
253 bool contains_variable;
254 /* The value of the lattice is bottom (i.e. variable and unusable for any
255 propagation). */
256 bool bottom;
258 inline bool is_single_const ();
259 inline bool set_to_bottom ();
260 inline bool set_contains_variable ();
261 bool add_value (valtype newval, cgraph_edge *cs,
262 ipcp_value<valtype> *src_val = NULL,
263 int src_idx = 0, HOST_WIDE_INT offset = -1,
264 ipcp_value<valtype> **val_p = NULL,
265 unsigned same_lat_gen_level = 0);
266 void print (FILE * f, bool dump_sources, bool dump_benefits);
269 /* Lattice of tree values with an offset to describe a part of an
270 aggregate. */
272 struct ipcp_agg_lattice : public ipcp_lattice<tree>
274 public:
275 /* Offset that is being described by this lattice. */
276 HOST_WIDE_INT offset;
277 /* Size so that we don't have to re-compute it every time we traverse the
278 list. Must correspond to TYPE_SIZE of all lat values. */
279 HOST_WIDE_INT size;
280 /* Next element of the linked list. */
281 struct ipcp_agg_lattice *next;
284 /* Lattice of known bits, only capable of holding one value.
285 Bitwise constant propagation propagates which bits of a
286 value are constant.
287 For eg:
288 int f(int x)
290 return some_op (x);
293 int f1(int y)
295 if (cond)
296 return f (y & 0xff);
297 else
298 return f (y & 0xf);
301 In the above case, the param 'x' will always have all
302 the bits (except the bits in lsb) set to 0.
303 Hence the mask of 'x' would be 0xff. The mask
304 reflects that the bits in lsb are unknown.
305 The actual propagated value is given by m_value & ~m_mask. */
307 class ipcp_bits_lattice
309 public:
310 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
311 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
312 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
313 bool set_to_bottom ();
314 bool set_to_constant (widest_int, widest_int);
315 bool known_nonzero_p () const;
317 widest_int get_value () const { return m_value; }
318 widest_int get_mask () const { return m_mask; }
320 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
321 enum tree_code, tree, bool);
323 bool meet_with (widest_int, widest_int, unsigned);
325 void print (FILE *);
327 private:
328 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
330 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
331 If a bit in mask is set to 0, then the corresponding bit in
332 value is known to be constant. */
333 widest_int m_value, m_mask;
335 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
336 void get_value_and_mask (tree, widest_int *, widest_int *);
339 /* Lattice of value ranges. */
341 class ipcp_vr_lattice
343 public:
344 value_range m_vr;
346 inline bool bottom_p () const;
347 inline bool top_p () const;
348 inline bool set_to_bottom ();
349 bool meet_with (const value_range *p_vr);
350 bool meet_with (const ipcp_vr_lattice &other);
351 void init () { gcc_assert (m_vr.undefined_p ()); }
352 void print (FILE * f);
354 private:
355 bool meet_with_1 (const value_range *other_vr);
358 /* Structure containing lattices for a parameter itself and for pieces of
359 aggregates that are passed in the parameter or by a reference in a parameter
360 plus some other useful flags. */
362 class ipcp_param_lattices
364 public:
365 /* Lattice describing the value of the parameter itself. */
366 ipcp_lattice<tree> itself;
367 /* Lattice describing the polymorphic contexts of a parameter. */
368 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
369 /* Lattices describing aggregate parts. */
370 ipcp_agg_lattice *aggs;
371 /* Lattice describing known bits. */
372 ipcp_bits_lattice bits_lattice;
373 /* Lattice describing value range. */
374 ipcp_vr_lattice m_value_range;
375 /* Number of aggregate lattices */
376 int aggs_count;
377 /* True if aggregate data were passed by reference (as opposed to by
378 value). */
379 bool aggs_by_ref;
380 /* All aggregate lattices contain a variable component (in addition to
381 values). */
382 bool aggs_contain_variable;
383 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
384 for any propagation). */
385 bool aggs_bottom;
387 /* There is a virtual call based on this parameter. */
388 bool virt_call;
391 /* Allocation pools for values and their sources in ipa-cp. */
393 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
394 ("IPA-CP constant values");
396 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
397 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
399 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
400 ("IPA-CP value sources");
402 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
403 ("IPA_CP aggregate lattices");
405 /* Base count to use in heuristics when using profile feedback. */
407 static profile_count base_count;
409 /* Original overall size of the program. */
411 static long overall_size, orig_overall_size;
413 /* Node name to unique clone suffix number map. */
414 static hash_map<const char *, unsigned> *clone_num_suffixes;
416 /* Return the param lattices structure corresponding to the Ith formal
417 parameter of the function described by INFO. */
418 static inline class ipcp_param_lattices *
419 ipa_get_parm_lattices (class ipa_node_params *info, int i)
421 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
422 gcc_checking_assert (!info->ipcp_orig_node);
423 gcc_checking_assert (info->lattices);
424 return &(info->lattices[i]);
427 /* Return the lattice corresponding to the scalar value of the Ith formal
428 parameter of the function described by INFO. */
429 static inline ipcp_lattice<tree> *
430 ipa_get_scalar_lat (class ipa_node_params *info, int i)
432 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
433 return &plats->itself;
436 /* Return the lattice corresponding to the scalar value of the Ith formal
437 parameter of the function described by INFO. */
438 static inline ipcp_lattice<ipa_polymorphic_call_context> *
439 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
441 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
442 return &plats->ctxlat;
445 /* Return whether LAT is a lattice with a single constant and without an
446 undefined value. */
448 template <typename valtype>
449 inline bool
450 ipcp_lattice<valtype>::is_single_const ()
452 if (bottom || contains_variable || values_count != 1)
453 return false;
454 else
455 return true;
458 /* Print V which is extracted from a value in a lattice to F. */
460 static void
461 print_ipcp_constant_value (FILE * f, tree v)
463 if (TREE_CODE (v) == ADDR_EXPR
464 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
466 fprintf (f, "& ");
467 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
469 else
470 print_generic_expr (f, v);
473 /* Print V which is extracted from a value in a lattice to F. */
475 static void
476 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
478 v.dump(f, false);
481 /* Print a lattice LAT to F. */
483 template <typename valtype>
484 void
485 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
487 ipcp_value<valtype> *val;
488 bool prev = false;
490 if (bottom)
492 fprintf (f, "BOTTOM\n");
493 return;
496 if (!values_count && !contains_variable)
498 fprintf (f, "TOP\n");
499 return;
502 if (contains_variable)
504 fprintf (f, "VARIABLE");
505 prev = true;
506 if (dump_benefits)
507 fprintf (f, "\n");
510 for (val = values; val; val = val->next)
512 if (dump_benefits && prev)
513 fprintf (f, " ");
514 else if (!dump_benefits && prev)
515 fprintf (f, ", ");
516 else
517 prev = true;
519 print_ipcp_constant_value (f, val->value);
521 if (dump_sources)
523 ipcp_value_source<valtype> *s;
525 if (val->self_recursion_generated_p ())
526 fprintf (f, " [self_gen(%i), from:",
527 val->self_recursion_generated_level);
528 else
529 fprintf (f, " [scc: %i, from:", val->scc_no);
530 for (s = val->sources; s; s = s->next)
531 fprintf (f, " %i(%f)", s->cs->caller->order,
532 s->cs->sreal_frequency ().to_double ());
533 fprintf (f, "]");
536 if (dump_benefits)
537 fprintf (f, " [loc_time: %g, loc_size: %i, "
538 "prop_time: %g, prop_size: %i]\n",
539 val->local_time_benefit.to_double (), val->local_size_cost,
540 val->prop_time_benefit.to_double (), val->prop_size_cost);
542 if (!dump_benefits)
543 fprintf (f, "\n");
546 void
547 ipcp_bits_lattice::print (FILE *f)
549 if (top_p ())
550 fprintf (f, " Bits unknown (TOP)\n");
551 else if (bottom_p ())
552 fprintf (f, " Bits unusable (BOTTOM)\n");
553 else
555 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
556 fprintf (f, ", mask = "); print_hex (get_mask (), f);
557 fprintf (f, "\n");
561 /* Print value range lattice to F. */
563 void
564 ipcp_vr_lattice::print (FILE * f)
566 dump_value_range (f, &m_vr);
569 /* Print all ipcp_lattices of all functions to F. */
571 static void
572 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
574 struct cgraph_node *node;
575 int i, count;
577 fprintf (f, "\nLattices:\n");
578 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
580 class ipa_node_params *info;
582 info = ipa_node_params_sum->get (node);
583 /* Skip unoptimized functions and constprop clones since we don't make
584 lattices for them. */
585 if (!info || info->ipcp_orig_node)
586 continue;
587 fprintf (f, " Node: %s:\n", node->dump_name ());
588 count = ipa_get_param_count (info);
589 for (i = 0; i < count; i++)
591 struct ipcp_agg_lattice *aglat;
592 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
593 fprintf (f, " param [%d]: ", i);
594 plats->itself.print (f, dump_sources, dump_benefits);
595 fprintf (f, " ctxs: ");
596 plats->ctxlat.print (f, dump_sources, dump_benefits);
597 plats->bits_lattice.print (f);
598 fprintf (f, " ");
599 plats->m_value_range.print (f);
600 fprintf (f, "\n");
601 if (plats->virt_call)
602 fprintf (f, " virt_call flag set\n");
604 if (plats->aggs_bottom)
606 fprintf (f, " AGGS BOTTOM\n");
607 continue;
609 if (plats->aggs_contain_variable)
610 fprintf (f, " AGGS VARIABLE\n");
611 for (aglat = plats->aggs; aglat; aglat = aglat->next)
613 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
614 plats->aggs_by_ref ? "ref " : "", aglat->offset);
615 aglat->print (f, dump_sources, dump_benefits);
621 /* Determine whether it is at all technically possible to create clones of NODE
622 and store this information in the ipa_node_params structure associated
623 with NODE. */
625 static void
626 determine_versionability (struct cgraph_node *node,
627 class ipa_node_params *info)
629 const char *reason = NULL;
631 /* There are a number of generic reasons functions cannot be versioned. We
632 also cannot remove parameters if there are type attributes such as fnspec
633 present. */
634 if (node->alias || node->thunk)
635 reason = "alias or thunk";
636 else if (!node->versionable)
637 reason = "not a tree_versionable_function";
638 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
639 reason = "insufficient body availability";
640 else if (!opt_for_fn (node->decl, optimize)
641 || !opt_for_fn (node->decl, flag_ipa_cp))
642 reason = "non-optimized function";
643 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
645 /* Ideally we should clone the SIMD clones themselves and create
646 vector copies of them, so IPA-cp and SIMD clones can happily
647 coexist, but that may not be worth the effort. */
648 reason = "function has SIMD clones";
650 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
652 /* Ideally we should clone the target clones themselves and create
653 copies of them, so IPA-cp and target clones can happily
654 coexist, but that may not be worth the effort. */
655 reason = "function target_clones attribute";
657 /* Don't clone decls local to a comdat group; it breaks and for C++
658 decloned constructors, inlining is always better anyway. */
659 else if (node->comdat_local_p ())
660 reason = "comdat-local function";
661 else if (node->calls_comdat_local)
663 /* TODO: call is versionable if we make sure that all
664 callers are inside of a comdat group. */
665 reason = "calls comdat-local function";
668 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
669 work only when inlined. Cloning them may still lead to better code
670 because ipa-cp will not give up on cloning further. If the function is
671 external this however leads to wrong code because we may end up producing
672 offline copy of the function. */
673 if (DECL_EXTERNAL (node->decl))
674 for (cgraph_edge *edge = node->callees; !reason && edge;
675 edge = edge->next_callee)
676 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
678 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
679 reason = "external function which calls va_arg_pack";
680 if (DECL_FUNCTION_CODE (edge->callee->decl)
681 == BUILT_IN_VA_ARG_PACK_LEN)
682 reason = "external function which calls va_arg_pack_len";
685 if (reason && dump_file && !node->alias && !node->thunk)
686 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
687 node->dump_name (), reason);
689 info->versionable = (reason == NULL);
692 /* Return true if it is at all technically possible to create clones of a
693 NODE. */
695 static bool
696 ipcp_versionable_function_p (struct cgraph_node *node)
698 ipa_node_params *info = ipa_node_params_sum->get (node);
699 return info && info->versionable;
702 /* Structure holding accumulated information about callers of a node. */
704 struct caller_statistics
706 /* If requested (see below), self-recursive call counts are summed into this
707 field. */
708 profile_count rec_count_sum;
709 /* The sum of all ipa counts of all the other (non-recursive) calls. */
710 profile_count count_sum;
711 /* Sum of all frequencies for all calls. */
712 sreal freq_sum;
713 /* Number of calls and hot calls respectively. */
714 int n_calls, n_hot_calls;
715 /* If itself is set up, also count the number of non-self-recursive
716 calls. */
717 int n_nonrec_calls;
718 /* If non-NULL, this is the node itself and calls from it should have their
719 counts included in rec_count_sum and not count_sum. */
720 cgraph_node *itself;
723 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
724 from IGNORED_CALLER are not counted. */
726 static inline void
727 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
729 stats->rec_count_sum = profile_count::zero ();
730 stats->count_sum = profile_count::zero ();
731 stats->n_calls = 0;
732 stats->n_hot_calls = 0;
733 stats->n_nonrec_calls = 0;
734 stats->freq_sum = 0;
735 stats->itself = itself;
738 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
739 non-thunk incoming edges to NODE. */
741 static bool
742 gather_caller_stats (struct cgraph_node *node, void *data)
744 struct caller_statistics *stats = (struct caller_statistics *) data;
745 struct cgraph_edge *cs;
747 for (cs = node->callers; cs; cs = cs->next_caller)
748 if (!cs->caller->thunk)
750 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
751 if (info && info->node_dead)
752 continue;
754 if (cs->count.ipa ().initialized_p ())
756 if (stats->itself && stats->itself == cs->caller)
757 stats->rec_count_sum += cs->count.ipa ();
758 else
759 stats->count_sum += cs->count.ipa ();
761 stats->freq_sum += cs->sreal_frequency ();
762 stats->n_calls++;
763 if (stats->itself && stats->itself != cs->caller)
764 stats->n_nonrec_calls++;
766 if (cs->maybe_hot_p ())
767 stats->n_hot_calls ++;
769 return false;
773 /* Return true if this NODE is viable candidate for cloning. */
775 static bool
776 ipcp_cloning_candidate_p (struct cgraph_node *node)
778 struct caller_statistics stats;
780 gcc_checking_assert (node->has_gimple_body_p ());
782 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
784 if (dump_file)
785 fprintf (dump_file, "Not considering %s for cloning; "
786 "-fipa-cp-clone disabled.\n",
787 node->dump_name ());
788 return false;
791 if (node->optimize_for_size_p ())
793 if (dump_file)
794 fprintf (dump_file, "Not considering %s for cloning; "
795 "optimizing it for size.\n",
796 node->dump_name ());
797 return false;
800 init_caller_stats (&stats);
801 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
803 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
805 if (dump_file)
806 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
807 node->dump_name ());
808 return true;
811 /* When profile is available and function is hot, propagate into it even if
812 calls seems cold; constant propagation can improve function's speed
813 significantly. */
814 if (stats.count_sum > profile_count::zero ()
815 && node->count.ipa ().initialized_p ())
817 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
819 if (dump_file)
820 fprintf (dump_file, "Considering %s for cloning; "
821 "usually called directly.\n",
822 node->dump_name ());
823 return true;
826 if (!stats.n_hot_calls)
828 if (dump_file)
829 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
830 node->dump_name ());
831 return false;
833 if (dump_file)
834 fprintf (dump_file, "Considering %s for cloning.\n",
835 node->dump_name ());
836 return true;
839 template <typename valtype>
840 class value_topo_info
842 public:
843 /* Head of the linked list of topologically sorted values. */
844 ipcp_value<valtype> *values_topo;
845 /* Stack for creating SCCs, represented by a linked list too. */
846 ipcp_value<valtype> *stack;
847 /* Counter driving the algorithm in add_val_to_toposort. */
848 int dfs_counter;
850 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
852 void add_val (ipcp_value<valtype> *cur_val);
853 void propagate_effects ();
856 /* Arrays representing a topological ordering of call graph nodes and a stack
857 of nodes used during constant propagation and also data required to perform
858 topological sort of values and propagation of benefits in the determined
859 order. */
861 class ipa_topo_info
863 public:
864 /* Array with obtained topological order of cgraph nodes. */
865 struct cgraph_node **order;
866 /* Stack of cgraph nodes used during propagation within SCC until all values
867 in the SCC stabilize. */
868 struct cgraph_node **stack;
869 int nnodes, stack_top;
871 value_topo_info<tree> constants;
872 value_topo_info<ipa_polymorphic_call_context> contexts;
874 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
875 constants ()
879 /* Skip edges from and to nodes without ipa_cp enabled.
880 Ignore not available symbols. */
882 static bool
883 ignore_edge_p (cgraph_edge *e)
885 enum availability avail;
886 cgraph_node *ultimate_target
887 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
889 return (avail <= AVAIL_INTERPOSABLE
890 || !opt_for_fn (ultimate_target->decl, optimize)
891 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
894 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
896 static void
897 build_toporder_info (class ipa_topo_info *topo)
899 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
900 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
902 gcc_checking_assert (topo->stack_top == 0);
903 topo->nnodes = ipa_reduced_postorder (topo->order, true,
904 ignore_edge_p);
907 /* Free information about strongly connected components and the arrays in
908 TOPO. */
910 static void
911 free_toporder_info (class ipa_topo_info *topo)
913 ipa_free_postorder_info ();
914 free (topo->order);
915 free (topo->stack);
918 /* Add NODE to the stack in TOPO, unless it is already there. */
920 static inline void
921 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
923 ipa_node_params *info = ipa_node_params_sum->get (node);
924 if (info->node_enqueued)
925 return;
926 info->node_enqueued = 1;
927 topo->stack[topo->stack_top++] = node;
930 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
931 is empty. */
933 static struct cgraph_node *
934 pop_node_from_stack (class ipa_topo_info *topo)
936 if (topo->stack_top)
938 struct cgraph_node *node;
939 topo->stack_top--;
940 node = topo->stack[topo->stack_top];
941 ipa_node_params_sum->get (node)->node_enqueued = 0;
942 return node;
944 else
945 return NULL;
948 /* Set lattice LAT to bottom and return true if it previously was not set as
949 such. */
951 template <typename valtype>
952 inline bool
953 ipcp_lattice<valtype>::set_to_bottom ()
955 bool ret = !bottom;
956 bottom = true;
957 return ret;
960 /* Mark lattice as containing an unknown value and return true if it previously
961 was not marked as such. */
963 template <typename valtype>
964 inline bool
965 ipcp_lattice<valtype>::set_contains_variable ()
967 bool ret = !contains_variable;
968 contains_variable = true;
969 return ret;
972 /* Set all aggregate lattices in PLATS to bottom and return true if they were
973 not previously set as such. */
975 static inline bool
976 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
978 bool ret = !plats->aggs_bottom;
979 plats->aggs_bottom = true;
980 return ret;
983 /* Mark all aggregate lattices in PLATS as containing an unknown value and
984 return true if they were not previously marked as such. */
986 static inline bool
987 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
989 bool ret = !plats->aggs_contain_variable;
990 plats->aggs_contain_variable = true;
991 return ret;
994 bool
995 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
997 return meet_with_1 (&other.m_vr);
1000 /* Meet the current value of the lattice with value range described by VR
1001 lattice. */
1003 bool
1004 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1006 return meet_with_1 (p_vr);
1009 /* Meet the current value of the lattice with value range described by
1010 OTHER_VR lattice. Return TRUE if anything changed. */
1012 bool
1013 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1015 if (bottom_p ())
1016 return false;
1018 if (other_vr->varying_p ())
1019 return set_to_bottom ();
1021 value_range save (m_vr);
1022 m_vr.union_ (*other_vr);
1023 return m_vr != save;
1026 /* Return true if value range information in the lattice is yet unknown. */
1028 bool
1029 ipcp_vr_lattice::top_p () const
1031 return m_vr.undefined_p ();
1034 /* Return true if value range information in the lattice is known to be
1035 unusable. */
1037 bool
1038 ipcp_vr_lattice::bottom_p () const
1040 return m_vr.varying_p ();
1043 /* Set value range information in the lattice to bottom. Return true if it
1044 previously was in a different state. */
1046 bool
1047 ipcp_vr_lattice::set_to_bottom ()
1049 if (m_vr.varying_p ())
1050 return false;
1051 /* ?? We create all sorts of VARYING ranges for floats, structures,
1052 and other types which we cannot handle as ranges. We should
1053 probably avoid handling them throughout the pass, but it's easier
1054 to create a sensible VARYING here and let the lattice
1055 propagate. */
1056 m_vr.set_varying (integer_type_node);
1057 return true;
1060 /* Set lattice value to bottom, if it already isn't the case. */
1062 bool
1063 ipcp_bits_lattice::set_to_bottom ()
1065 if (bottom_p ())
1066 return false;
1067 m_lattice_val = IPA_BITS_VARYING;
1068 m_value = 0;
1069 m_mask = -1;
1070 return true;
1073 /* Set to constant if it isn't already. Only meant to be called
1074 when switching state from TOP. */
1076 bool
1077 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1079 gcc_assert (top_p ());
1080 m_lattice_val = IPA_BITS_CONSTANT;
1081 m_value = wi::bit_and (wi::bit_not (mask), value);
1082 m_mask = mask;
1083 return true;
1086 /* Return true if any of the known bits are non-zero. */
1088 bool
1089 ipcp_bits_lattice::known_nonzero_p () const
1091 if (!constant_p ())
1092 return false;
1093 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1096 /* Convert operand to value, mask form. */
1098 void
1099 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1101 wide_int get_nonzero_bits (const_tree);
1103 if (TREE_CODE (operand) == INTEGER_CST)
1105 *valuep = wi::to_widest (operand);
1106 *maskp = 0;
1108 else
1110 *valuep = 0;
1111 *maskp = -1;
1115 /* Meet operation, similar to ccp_lattice_meet, we xor values
1116 if this->value, value have different values at same bit positions, we want
1117 to drop that bit to varying. Return true if mask is changed.
1118 This function assumes that the lattice value is in CONSTANT state. If
1119 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1121 bool
1122 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1123 unsigned precision, bool drop_all_ones)
1125 gcc_assert (constant_p ());
1127 widest_int old_mask = m_mask;
1128 m_mask = (m_mask | mask) | (m_value ^ value);
1129 if (drop_all_ones)
1130 m_mask |= m_value;
1131 m_value &= ~m_mask;
1133 if (wi::sext (m_mask, precision) == -1)
1134 return set_to_bottom ();
1136 return m_mask != old_mask;
1139 /* Meet the bits lattice with operand
1140 described by <value, mask, sgn, precision. */
1142 bool
1143 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1144 unsigned precision)
1146 if (bottom_p ())
1147 return false;
1149 if (top_p ())
1151 if (wi::sext (mask, precision) == -1)
1152 return set_to_bottom ();
1153 return set_to_constant (value, mask);
1156 return meet_with_1 (value, mask, precision, false);
1159 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1160 if code is binary operation or bit_value_unop (other) if code is unary op.
1161 In the case when code is nop_expr, no adjustment is required. If
1162 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1164 bool
1165 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1166 signop sgn, enum tree_code code, tree operand,
1167 bool drop_all_ones)
1169 if (other.bottom_p ())
1170 return set_to_bottom ();
1172 if (bottom_p () || other.top_p ())
1173 return false;
1175 widest_int adjusted_value, adjusted_mask;
1177 if (TREE_CODE_CLASS (code) == tcc_binary)
1179 tree type = TREE_TYPE (operand);
1180 widest_int o_value, o_mask;
1181 get_value_and_mask (operand, &o_value, &o_mask);
1183 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1184 sgn, precision, other.get_value (), other.get_mask (),
1185 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1187 if (wi::sext (adjusted_mask, precision) == -1)
1188 return set_to_bottom ();
1191 else if (TREE_CODE_CLASS (code) == tcc_unary)
1193 bit_value_unop (code, sgn, precision, &adjusted_value,
1194 &adjusted_mask, sgn, precision, other.get_value (),
1195 other.get_mask ());
1197 if (wi::sext (adjusted_mask, precision) == -1)
1198 return set_to_bottom ();
1201 else
1202 return set_to_bottom ();
1204 if (top_p ())
1206 if (drop_all_ones)
1208 adjusted_mask |= adjusted_value;
1209 adjusted_value &= ~adjusted_mask;
1211 if (wi::sext (adjusted_mask, precision) == -1)
1212 return set_to_bottom ();
1213 return set_to_constant (adjusted_value, adjusted_mask);
1215 else
1216 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1217 drop_all_ones);
1220 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1221 return true is any of them has not been marked as such so far. */
1223 static inline bool
1224 set_all_contains_variable (class ipcp_param_lattices *plats)
1226 bool ret;
1227 ret = plats->itself.set_contains_variable ();
1228 ret |= plats->ctxlat.set_contains_variable ();
1229 ret |= set_agg_lats_contain_variable (plats);
1230 ret |= plats->bits_lattice.set_to_bottom ();
1231 ret |= plats->m_value_range.set_to_bottom ();
1232 return ret;
1235 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1236 points to by the number of callers to NODE. */
1238 static bool
1239 count_callers (cgraph_node *node, void *data)
1241 int *caller_count = (int *) data;
1243 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1244 /* Local thunks can be handled transparently, but if the thunk cannot
1245 be optimized out, count it as a real use. */
1246 if (!cs->caller->thunk || !cs->caller->local)
1247 ++*caller_count;
1248 return false;
1251 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1252 the one caller of some other node. Set the caller's corresponding flag. */
1254 static bool
1255 set_single_call_flag (cgraph_node *node, void *)
1257 cgraph_edge *cs = node->callers;
1258 /* Local thunks can be handled transparently, skip them. */
1259 while (cs && cs->caller->thunk && cs->caller->local)
1260 cs = cs->next_caller;
1261 if (cs)
1262 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1264 info->node_calling_single_call = true;
1265 return true;
1267 return false;
1270 /* Initialize ipcp_lattices. */
1272 static void
1273 initialize_node_lattices (struct cgraph_node *node)
1275 ipa_node_params *info = ipa_node_params_sum->get (node);
1276 struct cgraph_edge *ie;
1277 bool disable = false, variable = false;
1278 int i;
1280 gcc_checking_assert (node->has_gimple_body_p ());
1282 if (!ipa_get_param_count (info))
1283 disable = true;
1284 else if (node->local)
1286 int caller_count = 0;
1287 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1288 true);
1289 gcc_checking_assert (caller_count > 0);
1290 if (caller_count == 1)
1291 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1292 NULL, true);
1294 else
1296 /* When cloning is allowed, we can assume that externally visible
1297 functions are not called. We will compensate this by cloning
1298 later. */
1299 if (ipcp_versionable_function_p (node)
1300 && ipcp_cloning_candidate_p (node))
1301 variable = true;
1302 else
1303 disable = true;
1306 if (dump_file && (dump_flags & TDF_DETAILS)
1307 && !node->alias && !node->thunk)
1309 fprintf (dump_file, "Initializing lattices of %s\n",
1310 node->dump_name ());
1311 if (disable || variable)
1312 fprintf (dump_file, " Marking all lattices as %s\n",
1313 disable ? "BOTTOM" : "VARIABLE");
1316 auto_vec<bool, 16> surviving_params;
1317 bool pre_modified = false;
1319 clone_info *cinfo = clone_info::get (node);
1321 if (!disable && cinfo && cinfo->param_adjustments)
1323 /* At the moment all IPA optimizations should use the number of
1324 parameters of the prevailing decl as the m_always_copy_start.
1325 Handling any other value would complicate the code below, so for the
1326 time bing let's only assert it is so. */
1327 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1328 == ipa_get_param_count (info))
1329 || cinfo->param_adjustments->m_always_copy_start < 0);
1331 pre_modified = true;
1332 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1334 if (dump_file && (dump_flags & TDF_DETAILS)
1335 && !node->alias && !node->thunk)
1337 bool first = true;
1338 for (int j = 0; j < ipa_get_param_count (info); j++)
1340 if (j < (int) surviving_params.length ()
1341 && surviving_params[j])
1342 continue;
1343 if (first)
1345 fprintf (dump_file,
1346 " The following parameters are dead on arrival:");
1347 first = false;
1349 fprintf (dump_file, " %u", j);
1351 if (!first)
1352 fprintf (dump_file, "\n");
1356 for (i = 0; i < ipa_get_param_count (info); i++)
1358 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1359 if (disable
1360 || !ipa_get_type (info, i)
1361 || (pre_modified && (surviving_params.length () <= (unsigned) i
1362 || !surviving_params[i])))
1364 plats->itself.set_to_bottom ();
1365 plats->ctxlat.set_to_bottom ();
1366 set_agg_lats_to_bottom (plats);
1367 plats->bits_lattice.set_to_bottom ();
1368 plats->m_value_range.m_vr = value_range ();
1369 plats->m_value_range.set_to_bottom ();
1371 else
1373 plats->m_value_range.init ();
1374 if (variable)
1375 set_all_contains_variable (plats);
1379 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1380 if (ie->indirect_info->polymorphic
1381 && ie->indirect_info->param_index >= 0)
1383 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1384 ipa_get_parm_lattices (info,
1385 ie->indirect_info->param_index)->virt_call = 1;
1389 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1390 PARAM_TYPE. */
1392 static bool
1393 ipacp_value_safe_for_type (tree param_type, tree value)
1395 tree val_type = TREE_TYPE (value);
1396 if (param_type == val_type
1397 || useless_type_conversion_p (param_type, val_type)
1398 || fold_convertible_p (param_type, value))
1399 return true;
1400 else
1401 return false;
1404 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1406 static bool
1407 values_equal_for_ipcp_p (tree x, tree y)
1409 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1411 if (x == y)
1412 return true;
1414 if (TREE_CODE (x) == ADDR_EXPR
1415 && TREE_CODE (y) == ADDR_EXPR
1416 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1417 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1418 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1419 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1420 else
1421 return operand_equal_p (x, y, 0);
1424 /* Return the result of a (possibly arithmetic) operation on the constant
1425 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1426 the type of the parameter to which the result is passed. Return
1427 NULL_TREE if that cannot be determined or be considered an
1428 interprocedural invariant. */
1430 static tree
1431 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1432 tree res_type)
1434 tree res;
1436 if (opcode == NOP_EXPR)
1437 return input;
1438 if (!is_gimple_ip_invariant (input))
1439 return NULL_TREE;
1441 if (opcode == ASSERT_EXPR)
1443 if (values_equal_for_ipcp_p (input, operand))
1444 return input;
1445 else
1446 return NULL_TREE;
1449 if (!res_type)
1451 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1452 res_type = boolean_type_node;
1453 else if (expr_type_first_operand_type_p (opcode))
1454 res_type = TREE_TYPE (input);
1455 else
1456 return NULL_TREE;
1459 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1460 res = fold_unary (opcode, res_type, input);
1461 else
1462 res = fold_binary (opcode, res_type, input, operand);
1464 if (res && !is_gimple_ip_invariant (res))
1465 return NULL_TREE;
1467 return res;
1470 /* Return the result of a (possibly arithmetic) pass through jump function
1471 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1472 to which the result is passed. Return NULL_TREE if that cannot be
1473 determined or be considered an interprocedural invariant. */
1475 static tree
1476 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1477 tree res_type)
1479 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1480 input,
1481 ipa_get_jf_pass_through_operand (jfunc),
1482 res_type);
1485 /* Return the result of an ancestor jump function JFUNC on the constant value
1486 INPUT. Return NULL_TREE if that cannot be determined. */
1488 static tree
1489 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1491 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1492 if (TREE_CODE (input) == ADDR_EXPR)
1494 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1495 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1496 if (known_eq (off, 0))
1497 return input;
1498 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1499 return build1 (ADDR_EXPR, TREE_TYPE (input),
1500 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1501 build_int_cst (ptr_type_node, byte_offset)));
1503 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1504 && zerop (input))
1505 return input;
1506 else
1507 return NULL_TREE;
1510 /* Determine whether JFUNC evaluates to a single known constant value and if
1511 so, return it. Otherwise return NULL. INFO describes the caller node or
1512 the one it is inlined to, so that pass-through jump functions can be
1513 evaluated. PARM_TYPE is the type of the parameter to which the result is
1514 passed. */
1516 tree
1517 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1518 tree parm_type)
1520 if (jfunc->type == IPA_JF_CONST)
1521 return ipa_get_jf_constant (jfunc);
1522 else if (jfunc->type == IPA_JF_PASS_THROUGH
1523 || jfunc->type == IPA_JF_ANCESTOR)
1525 tree input;
1526 int idx;
1528 if (jfunc->type == IPA_JF_PASS_THROUGH)
1529 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1530 else
1531 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1533 if (info->ipcp_orig_node)
1534 input = info->known_csts[idx];
1535 else
1537 ipcp_lattice<tree> *lat;
1539 if (!info->lattices
1540 || idx >= ipa_get_param_count (info))
1541 return NULL_TREE;
1542 lat = ipa_get_scalar_lat (info, idx);
1543 if (!lat->is_single_const ())
1544 return NULL_TREE;
1545 input = lat->values->value;
1548 if (!input)
1549 return NULL_TREE;
1551 if (jfunc->type == IPA_JF_PASS_THROUGH)
1552 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1553 else
1554 return ipa_get_jf_ancestor_result (jfunc, input);
1556 else
1557 return NULL_TREE;
1560 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1561 that INFO describes the caller node or the one it is inlined to, CS is the
1562 call graph edge corresponding to JFUNC and CSIDX index of the described
1563 parameter. */
1565 ipa_polymorphic_call_context
1566 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1567 ipa_jump_func *jfunc)
1569 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1570 ipa_polymorphic_call_context ctx;
1571 ipa_polymorphic_call_context *edge_ctx
1572 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1574 if (edge_ctx && !edge_ctx->useless_p ())
1575 ctx = *edge_ctx;
1577 if (jfunc->type == IPA_JF_PASS_THROUGH
1578 || jfunc->type == IPA_JF_ANCESTOR)
1580 ipa_polymorphic_call_context srcctx;
1581 int srcidx;
1582 bool type_preserved = true;
1583 if (jfunc->type == IPA_JF_PASS_THROUGH)
1585 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1586 return ctx;
1587 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1588 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1590 else
1592 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1593 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1595 if (info->ipcp_orig_node)
1597 if (info->known_contexts.exists ())
1598 srcctx = info->known_contexts[srcidx];
1600 else
1602 if (!info->lattices
1603 || srcidx >= ipa_get_param_count (info))
1604 return ctx;
1605 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1606 lat = ipa_get_poly_ctx_lat (info, srcidx);
1607 if (!lat->is_single_const ())
1608 return ctx;
1609 srcctx = lat->values->value;
1611 if (srcctx.useless_p ())
1612 return ctx;
1613 if (jfunc->type == IPA_JF_ANCESTOR)
1614 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1615 if (!type_preserved)
1616 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1617 srcctx.combine_with (ctx);
1618 return srcctx;
1621 return ctx;
1624 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1625 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1626 the result is a range or an anti-range. */
1628 static bool
1629 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1630 value_range *src_vr,
1631 enum tree_code operation,
1632 tree dst_type, tree src_type)
1634 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1635 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1636 return false;
1637 return true;
1640 /* Determine value_range of JFUNC given that INFO describes the caller node or
1641 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1642 and PARM_TYPE of the parameter. */
1644 value_range
1645 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1646 ipa_jump_func *jfunc, tree parm_type)
1648 value_range vr;
1649 if (jfunc->m_vr)
1650 ipa_vr_operation_and_type_effects (&vr,
1651 jfunc->m_vr,
1652 NOP_EXPR, parm_type,
1653 jfunc->m_vr->type ());
1654 if (vr.singleton_p ())
1655 return vr;
1656 if (jfunc->type == IPA_JF_PASS_THROUGH)
1658 int idx;
1659 ipcp_transformation *sum
1660 = ipcp_get_transformation_summary (cs->caller->inlined_to
1661 ? cs->caller->inlined_to
1662 : cs->caller);
1663 if (!sum || !sum->m_vr)
1664 return vr;
1666 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1668 if (!(*sum->m_vr)[idx].known)
1669 return vr;
1670 tree vr_type = ipa_get_type (info, idx);
1671 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1672 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1673 (*sum->m_vr)[idx].type);
1675 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1677 if (TREE_CODE_CLASS (operation) == tcc_unary)
1679 value_range res;
1681 if (ipa_vr_operation_and_type_effects (&res,
1682 &srcvr,
1683 operation, parm_type,
1684 vr_type))
1685 vr.intersect (res);
1687 else
1689 value_range op_res, res;
1690 tree op = ipa_get_jf_pass_through_operand (jfunc);
1691 value_range op_vr (op, op);
1693 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1694 if (ipa_vr_operation_and_type_effects (&res,
1695 &op_res,
1696 NOP_EXPR, parm_type,
1697 vr_type))
1698 vr.intersect (res);
1701 return vr;
1704 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1705 parameter with the given INDEX. */
1707 static tree
1708 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1709 int index)
1711 struct ipa_agg_replacement_value *aggval;
1713 aggval = ipa_get_agg_replacements_for_node (node);
1714 while (aggval)
1716 if (aggval->offset == offset
1717 && aggval->index == index)
1718 return aggval->value;
1719 aggval = aggval->next;
1721 return NULL_TREE;
1724 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1725 single known constant value and if so, return it. Otherwise return NULL.
1726 NODE and INFO describes the caller node or the one it is inlined to, and
1727 its related info. */
1729 static tree
1730 ipa_agg_value_from_node (class ipa_node_params *info,
1731 struct cgraph_node *node,
1732 struct ipa_agg_jf_item *item)
1734 tree value = NULL_TREE;
1735 int src_idx;
1737 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1738 return NULL_TREE;
1740 if (item->jftype == IPA_JF_CONST)
1741 return item->value.constant;
1743 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1744 || item->jftype == IPA_JF_LOAD_AGG);
1746 src_idx = item->value.pass_through.formal_id;
1748 if (info->ipcp_orig_node)
1750 if (item->jftype == IPA_JF_PASS_THROUGH)
1751 value = info->known_csts[src_idx];
1752 else
1753 value = get_clone_agg_value (node, item->value.load_agg.offset,
1754 src_idx);
1756 else if (info->lattices)
1758 class ipcp_param_lattices *src_plats
1759 = ipa_get_parm_lattices (info, src_idx);
1761 if (item->jftype == IPA_JF_PASS_THROUGH)
1763 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1765 if (!lat->is_single_const ())
1766 return NULL_TREE;
1768 value = lat->values->value;
1770 else if (src_plats->aggs
1771 && !src_plats->aggs_bottom
1772 && !src_plats->aggs_contain_variable
1773 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1775 struct ipcp_agg_lattice *aglat;
1777 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1779 if (aglat->offset > item->value.load_agg.offset)
1780 break;
1782 if (aglat->offset == item->value.load_agg.offset)
1784 if (aglat->is_single_const ())
1785 value = aglat->values->value;
1786 break;
1792 if (!value)
1793 return NULL_TREE;
1795 if (item->jftype == IPA_JF_LOAD_AGG)
1797 tree load_type = item->value.load_agg.type;
1798 tree value_type = TREE_TYPE (value);
1800 /* Ensure value type is compatible with load type. */
1801 if (!useless_type_conversion_p (load_type, value_type))
1802 return NULL_TREE;
1805 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1806 value,
1807 item->value.pass_through.operand,
1808 item->type);
1811 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1812 an aggregate and if so, return it. Otherwise return an empty set. NODE
1813 and INFO describes the caller node or the one it is inlined to, and its
1814 related info. */
1816 struct ipa_agg_value_set
1817 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1818 struct ipa_agg_jump_function *agg_jfunc)
1820 struct ipa_agg_value_set agg;
1821 struct ipa_agg_jf_item *item;
1822 int i;
1824 agg.items = vNULL;
1825 agg.by_ref = agg_jfunc->by_ref;
1827 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1829 tree value = ipa_agg_value_from_node (info, node, item);
1831 if (value)
1833 struct ipa_agg_value value_item;
1835 value_item.offset = item->offset;
1836 value_item.value = value;
1838 agg.items.safe_push (value_item);
1841 return agg;
1844 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1845 bottom, not containing a variable component and without any known value at
1846 the same time. */
1848 DEBUG_FUNCTION void
1849 ipcp_verify_propagated_values (void)
1851 struct cgraph_node *node;
1853 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1855 ipa_node_params *info = ipa_node_params_sum->get (node);
1856 if (!opt_for_fn (node->decl, flag_ipa_cp)
1857 || !opt_for_fn (node->decl, optimize))
1858 continue;
1859 int i, count = ipa_get_param_count (info);
1861 for (i = 0; i < count; i++)
1863 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1865 if (!lat->bottom
1866 && !lat->contains_variable
1867 && lat->values_count == 0)
1869 if (dump_file)
1871 symtab->dump (dump_file);
1872 fprintf (dump_file, "\nIPA lattices after constant "
1873 "propagation, before gcc_unreachable:\n");
1874 print_all_lattices (dump_file, true, false);
1877 gcc_unreachable ();
1883 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1885 static bool
1886 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1887 ipa_polymorphic_call_context y)
1889 return x.equal_to (y);
1893 /* Add a new value source to the value represented by THIS, marking that a
1894 value comes from edge CS and (if the underlying jump function is a
1895 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1896 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1897 scalar value of the parameter itself or the offset within an aggregate. */
1899 template <typename valtype>
1900 void
1901 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1902 int src_idx, HOST_WIDE_INT offset)
1904 ipcp_value_source<valtype> *src;
1906 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1907 src->offset = offset;
1908 src->cs = cs;
1909 src->val = src_val;
1910 src->index = src_idx;
1912 src->next = sources;
1913 sources = src;
1916 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1917 SOURCE and clear all other fields. */
1919 static ipcp_value<tree> *
1920 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1922 ipcp_value<tree> *val;
1924 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1925 val->value = cst;
1926 val->self_recursion_generated_level = same_lat_gen_level;
1927 return val;
1930 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1931 value to SOURCE and clear all other fields. */
1933 static ipcp_value<ipa_polymorphic_call_context> *
1934 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1935 unsigned same_lat_gen_level)
1937 ipcp_value<ipa_polymorphic_call_context> *val;
1939 val = new (ipcp_poly_ctx_values_pool.allocate ())
1940 ipcp_value<ipa_polymorphic_call_context>();
1941 val->value = ctx;
1942 val->self_recursion_generated_level = same_lat_gen_level;
1943 return val;
1946 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1947 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1948 meaning. OFFSET -1 means the source is scalar and not a part of an
1949 aggregate. If non-NULL, VAL_P records address of existing or newly added
1950 ipcp_value.
1952 If the value is generated for a self-recursive call as a result of an
1953 arithmetic pass-through jump-function acting on a value in the same lattice,
1954 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1955 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1957 template <typename valtype>
1958 bool
1959 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1960 ipcp_value<valtype> *src_val,
1961 int src_idx, HOST_WIDE_INT offset,
1962 ipcp_value<valtype> **val_p,
1963 unsigned same_lat_gen_level)
1965 ipcp_value<valtype> *val, *last_val = NULL;
1967 if (val_p)
1968 *val_p = NULL;
1970 if (bottom)
1971 return false;
1973 for (val = values; val; last_val = val, val = val->next)
1974 if (values_equal_for_ipcp_p (val->value, newval))
1976 if (val_p)
1977 *val_p = val;
1979 if (val->self_recursion_generated_level < same_lat_gen_level)
1980 val->self_recursion_generated_level = same_lat_gen_level;
1982 if (ipa_edge_within_scc (cs))
1984 ipcp_value_source<valtype> *s;
1985 for (s = val->sources; s; s = s->next)
1986 if (s->cs == cs && s->val == src_val)
1987 break;
1988 if (s)
1989 return false;
1992 val->add_source (cs, src_val, src_idx, offset);
1993 return false;
1996 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1997 param_ipa_cp_value_list_size))
1999 /* We can only free sources, not the values themselves, because sources
2000 of other values in this SCC might point to them. */
2001 for (val = values; val; val = val->next)
2003 while (val->sources)
2005 ipcp_value_source<valtype> *src = val->sources;
2006 val->sources = src->next;
2007 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2010 values = NULL;
2011 return set_to_bottom ();
2014 values_count++;
2015 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2016 val->add_source (cs, src_val, src_idx, offset);
2017 val->next = NULL;
2019 /* Add the new value to end of value list, which can reduce iterations
2020 of propagation stage for recursive function. */
2021 if (last_val)
2022 last_val->next = val;
2023 else
2024 values = val;
2026 if (val_p)
2027 *val_p = val;
2029 return true;
2032 /* A helper function that returns result of operation specified by OPCODE on
2033 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2034 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2035 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2036 the result. */
2038 static tree
2039 get_val_across_arith_op (enum tree_code opcode,
2040 tree opnd1_type,
2041 tree opnd2,
2042 ipcp_value<tree> *src_val,
2043 tree res_type)
2045 tree opnd1 = src_val->value;
2047 /* Skip source values that is incompatible with specified type. */
2048 if (opnd1_type
2049 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2050 return NULL_TREE;
2052 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2055 /* Propagate values through an arithmetic transformation described by a jump
2056 function associated with edge CS, taking values from SRC_LAT and putting
2057 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2058 OPND2 is a constant value if transformation is a binary operation.
2059 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2060 a part of the aggregate. SRC_IDX is the index of the source parameter.
2061 RES_TYPE is the value type of result being propagated into. Return true if
2062 DEST_LAT changed. */
2064 static bool
2065 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2066 enum tree_code opcode,
2067 tree opnd1_type,
2068 tree opnd2,
2069 ipcp_lattice<tree> *src_lat,
2070 ipcp_lattice<tree> *dest_lat,
2071 HOST_WIDE_INT src_offset,
2072 int src_idx,
2073 tree res_type)
2075 ipcp_value<tree> *src_val;
2076 bool ret = false;
2078 /* Due to circular dependencies, propagating within an SCC through arithmetic
2079 transformation would create infinite number of values. But for
2080 self-feeding recursive function, we could allow propagation in a limited
2081 count, and this can enable a simple kind of recursive function versioning.
2082 For other scenario, we would just make lattices bottom. */
2083 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2085 int i;
2087 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2088 param_ipa_cp_max_recursive_depth);
2089 if (src_lat != dest_lat || max_recursive_depth < 1)
2090 return dest_lat->set_contains_variable ();
2092 /* No benefit if recursive execution is in low probability. */
2093 if (cs->sreal_frequency () * 100
2094 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2095 param_ipa_cp_min_recursive_probability))
2096 return dest_lat->set_contains_variable ();
2098 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2100 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2102 /* Now we do not use self-recursively generated value as propagation
2103 source, this is absolutely conservative, but could avoid explosion
2104 of lattice's value space, especially when one recursive function
2105 calls another recursive. */
2106 if (src_val->self_recursion_generated_p ())
2108 ipcp_value_source<tree> *s;
2110 /* If the lattice has already been propagated for the call site,
2111 no need to do that again. */
2112 for (s = src_val->sources; s; s = s->next)
2113 if (s->cs == cs)
2114 return dest_lat->set_contains_variable ();
2116 else
2117 val_seeds.safe_push (src_val);
2120 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2122 /* Recursively generate lattice values with a limited count. */
2123 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2125 for (int j = 1; j < max_recursive_depth; j++)
2127 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2128 src_val, res_type);
2129 if (!cstval
2130 || !ipacp_value_safe_for_type (res_type, cstval))
2131 break;
2133 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2134 src_offset, &src_val, j);
2135 gcc_checking_assert (src_val);
2138 ret |= dest_lat->set_contains_variable ();
2140 else
2141 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2143 /* Now we do not use self-recursively generated value as propagation
2144 source, otherwise it is easy to make value space of normal lattice
2145 overflow. */
2146 if (src_val->self_recursion_generated_p ())
2148 ret |= dest_lat->set_contains_variable ();
2149 continue;
2152 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2153 src_val, res_type);
2154 if (cstval
2155 && ipacp_value_safe_for_type (res_type, cstval))
2156 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2157 src_offset);
2158 else
2159 ret |= dest_lat->set_contains_variable ();
2162 return ret;
2165 /* Propagate values through a pass-through jump function JFUNC associated with
2166 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2167 is the index of the source parameter. PARM_TYPE is the type of the
2168 parameter to which the result is passed. */
2170 static bool
2171 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2172 ipcp_lattice<tree> *src_lat,
2173 ipcp_lattice<tree> *dest_lat, int src_idx,
2174 tree parm_type)
2176 return propagate_vals_across_arith_jfunc (cs,
2177 ipa_get_jf_pass_through_operation (jfunc),
2178 NULL_TREE,
2179 ipa_get_jf_pass_through_operand (jfunc),
2180 src_lat, dest_lat, -1, src_idx, parm_type);
2183 /* Propagate values through an ancestor jump function JFUNC associated with
2184 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2185 is the index of the source parameter. */
2187 static bool
2188 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2189 struct ipa_jump_func *jfunc,
2190 ipcp_lattice<tree> *src_lat,
2191 ipcp_lattice<tree> *dest_lat, int src_idx,
2192 tree param_type)
2194 ipcp_value<tree> *src_val;
2195 bool ret = false;
2197 if (ipa_edge_within_scc (cs))
2198 return dest_lat->set_contains_variable ();
2200 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2202 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2204 if (t && ipacp_value_safe_for_type (param_type, t))
2205 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2206 else
2207 ret |= dest_lat->set_contains_variable ();
2210 return ret;
2213 /* Propagate scalar values across jump function JFUNC that is associated with
2214 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2215 parameter to which the result is passed. */
2217 static bool
2218 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2219 struct ipa_jump_func *jfunc,
2220 ipcp_lattice<tree> *dest_lat,
2221 tree param_type)
2223 if (dest_lat->bottom)
2224 return false;
2226 if (jfunc->type == IPA_JF_CONST)
2228 tree val = ipa_get_jf_constant (jfunc);
2229 if (ipacp_value_safe_for_type (param_type, val))
2230 return dest_lat->add_value (val, cs, NULL, 0);
2231 else
2232 return dest_lat->set_contains_variable ();
2234 else if (jfunc->type == IPA_JF_PASS_THROUGH
2235 || jfunc->type == IPA_JF_ANCESTOR)
2237 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2238 ipcp_lattice<tree> *src_lat;
2239 int src_idx;
2240 bool ret;
2242 if (jfunc->type == IPA_JF_PASS_THROUGH)
2243 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2244 else
2245 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2247 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2248 if (src_lat->bottom)
2249 return dest_lat->set_contains_variable ();
2251 /* If we would need to clone the caller and cannot, do not propagate. */
2252 if (!ipcp_versionable_function_p (cs->caller)
2253 && (src_lat->contains_variable
2254 || (src_lat->values_count > 1)))
2255 return dest_lat->set_contains_variable ();
2257 if (jfunc->type == IPA_JF_PASS_THROUGH)
2258 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2259 dest_lat, src_idx,
2260 param_type);
2261 else
2262 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2263 src_idx, param_type);
2265 if (src_lat->contains_variable)
2266 ret |= dest_lat->set_contains_variable ();
2268 return ret;
2271 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2272 use it for indirect inlining), we should propagate them too. */
2273 return dest_lat->set_contains_variable ();
2276 /* Propagate scalar values across jump function JFUNC that is associated with
2277 edge CS and describes argument IDX and put the values into DEST_LAT. */
2279 static bool
2280 propagate_context_across_jump_function (cgraph_edge *cs,
2281 ipa_jump_func *jfunc, int idx,
2282 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2284 if (dest_lat->bottom)
2285 return false;
2286 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2287 bool ret = false;
2288 bool added_sth = false;
2289 bool type_preserved = true;
2291 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2292 = ipa_get_ith_polymorhic_call_context (args, idx);
2294 if (edge_ctx_ptr)
2295 edge_ctx = *edge_ctx_ptr;
2297 if (jfunc->type == IPA_JF_PASS_THROUGH
2298 || jfunc->type == IPA_JF_ANCESTOR)
2300 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2301 int src_idx;
2302 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2304 /* TODO: Once we figure out how to propagate speculations, it will
2305 probably be a good idea to switch to speculation if type_preserved is
2306 not set instead of punting. */
2307 if (jfunc->type == IPA_JF_PASS_THROUGH)
2309 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2310 goto prop_fail;
2311 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2312 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2314 else
2316 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2317 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2320 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2321 /* If we would need to clone the caller and cannot, do not propagate. */
2322 if (!ipcp_versionable_function_p (cs->caller)
2323 && (src_lat->contains_variable
2324 || (src_lat->values_count > 1)))
2325 goto prop_fail;
2327 ipcp_value<ipa_polymorphic_call_context> *src_val;
2328 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2330 ipa_polymorphic_call_context cur = src_val->value;
2332 if (!type_preserved)
2333 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2334 if (jfunc->type == IPA_JF_ANCESTOR)
2335 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2336 /* TODO: In cases we know how the context is going to be used,
2337 we can improve the result by passing proper OTR_TYPE. */
2338 cur.combine_with (edge_ctx);
2339 if (!cur.useless_p ())
2341 if (src_lat->contains_variable
2342 && !edge_ctx.equal_to (cur))
2343 ret |= dest_lat->set_contains_variable ();
2344 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2345 added_sth = true;
2350 prop_fail:
2351 if (!added_sth)
2353 if (!edge_ctx.useless_p ())
2354 ret |= dest_lat->add_value (edge_ctx, cs);
2355 else
2356 ret |= dest_lat->set_contains_variable ();
2359 return ret;
2362 /* Propagate bits across jfunc that is associated with
2363 edge cs and update dest_lattice accordingly. */
2365 bool
2366 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2367 ipa_jump_func *jfunc,
2368 ipcp_bits_lattice *dest_lattice)
2370 if (dest_lattice->bottom_p ())
2371 return false;
2373 enum availability availability;
2374 cgraph_node *callee = cs->callee->function_symbol (&availability);
2375 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2376 tree parm_type = ipa_get_type (callee_info, idx);
2378 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2379 transform for these cases. Similarly, we can have bad type mismatches
2380 with LTO, avoid doing anything with those too. */
2381 if (!parm_type
2382 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2384 if (dump_file && (dump_flags & TDF_DETAILS))
2385 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2386 "param %i of %s is NULL or unsuitable for bits propagation\n",
2387 idx, cs->callee->dump_name ());
2389 return dest_lattice->set_to_bottom ();
2392 unsigned precision = TYPE_PRECISION (parm_type);
2393 signop sgn = TYPE_SIGN (parm_type);
2395 if (jfunc->type == IPA_JF_PASS_THROUGH
2396 || jfunc->type == IPA_JF_ANCESTOR)
2398 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2399 tree operand = NULL_TREE;
2400 enum tree_code code;
2401 unsigned src_idx;
2402 bool keep_null = false;
2404 if (jfunc->type == IPA_JF_PASS_THROUGH)
2406 code = ipa_get_jf_pass_through_operation (jfunc);
2407 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2408 if (code != NOP_EXPR)
2409 operand = ipa_get_jf_pass_through_operand (jfunc);
2411 else
2413 code = POINTER_PLUS_EXPR;
2414 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2415 unsigned HOST_WIDE_INT offset
2416 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2417 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2418 operand = build_int_cstu (size_type_node, offset);
2421 class ipcp_param_lattices *src_lats
2422 = ipa_get_parm_lattices (caller_info, src_idx);
2424 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2425 for eg consider:
2426 int f(int x)
2428 g (x & 0xff);
2430 Assume lattice for x is bottom, however we can still propagate
2431 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2432 and we store it in jump function during analysis stage. */
2434 if (!src_lats->bits_lattice.bottom_p ())
2436 bool drop_all_ones
2437 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2439 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2440 sgn, code, operand, drop_all_ones);
2444 if (jfunc->bits)
2445 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2446 precision);
2447 else
2448 return dest_lattice->set_to_bottom ();
2451 /* Propagate value range across jump function JFUNC that is associated with
2452 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2453 accordingly. */
2455 static bool
2456 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2457 class ipcp_param_lattices *dest_plats,
2458 tree param_type)
2460 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2462 if (dest_lat->bottom_p ())
2463 return false;
2465 if (!param_type
2466 || (!INTEGRAL_TYPE_P (param_type)
2467 && !POINTER_TYPE_P (param_type)))
2468 return dest_lat->set_to_bottom ();
2470 if (jfunc->type == IPA_JF_PASS_THROUGH)
2472 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2473 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2474 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2475 class ipcp_param_lattices *src_lats
2476 = ipa_get_parm_lattices (caller_info, src_idx);
2477 tree operand_type = ipa_get_type (caller_info, src_idx);
2479 if (src_lats->m_value_range.bottom_p ())
2480 return dest_lat->set_to_bottom ();
2482 value_range vr;
2483 if (TREE_CODE_CLASS (operation) == tcc_unary)
2484 ipa_vr_operation_and_type_effects (&vr,
2485 &src_lats->m_value_range.m_vr,
2486 operation, param_type,
2487 operand_type);
2488 /* A crude way to prevent unbounded number of value range updates
2489 in SCC components. We should allow limited number of updates within
2490 SCC, too. */
2491 else if (!ipa_edge_within_scc (cs))
2493 tree op = ipa_get_jf_pass_through_operand (jfunc);
2494 value_range op_vr (op, op);
2495 value_range op_res,res;
2497 range_fold_binary_expr (&op_res, operation, operand_type,
2498 &src_lats->m_value_range.m_vr, &op_vr);
2499 ipa_vr_operation_and_type_effects (&vr,
2500 &op_res,
2501 NOP_EXPR, param_type,
2502 operand_type);
2504 if (!vr.undefined_p () && !vr.varying_p ())
2506 if (jfunc->m_vr)
2508 value_range jvr;
2509 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2510 NOP_EXPR,
2511 param_type,
2512 jfunc->m_vr->type ()))
2513 vr.intersect (jvr);
2515 return dest_lat->meet_with (&vr);
2518 else if (jfunc->type == IPA_JF_CONST)
2520 tree val = ipa_get_jf_constant (jfunc);
2521 if (TREE_CODE (val) == INTEGER_CST)
2523 val = fold_convert (param_type, val);
2524 if (TREE_OVERFLOW_P (val))
2525 val = drop_tree_overflow (val);
2527 value_range tmpvr (val, val);
2528 return dest_lat->meet_with (&tmpvr);
2532 value_range vr;
2533 if (jfunc->m_vr
2534 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2535 param_type,
2536 jfunc->m_vr->type ()))
2537 return dest_lat->meet_with (&vr);
2538 else
2539 return dest_lat->set_to_bottom ();
2542 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2543 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2544 other cases, return false). If there are no aggregate items, set
2545 aggs_by_ref to NEW_AGGS_BY_REF. */
2547 static bool
2548 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2549 bool new_aggs_by_ref)
2551 if (dest_plats->aggs)
2553 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2555 set_agg_lats_to_bottom (dest_plats);
2556 return true;
2559 else
2560 dest_plats->aggs_by_ref = new_aggs_by_ref;
2561 return false;
2564 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2565 already existing lattice for the given OFFSET and SIZE, marking all skipped
2566 lattices as containing variable and checking for overlaps. If there is no
2567 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2568 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2569 unless there are too many already. If there are two many, return false. If
2570 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2571 skipped lattices were newly marked as containing variable, set *CHANGE to
2572 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2574 static bool
2575 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2576 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2577 struct ipcp_agg_lattice ***aglat,
2578 bool pre_existing, bool *change, int max_agg_items)
2580 gcc_checking_assert (offset >= 0);
2582 while (**aglat && (**aglat)->offset < offset)
2584 if ((**aglat)->offset + (**aglat)->size > offset)
2586 set_agg_lats_to_bottom (dest_plats);
2587 return false;
2589 *change |= (**aglat)->set_contains_variable ();
2590 *aglat = &(**aglat)->next;
2593 if (**aglat && (**aglat)->offset == offset)
2595 if ((**aglat)->size != val_size)
2597 set_agg_lats_to_bottom (dest_plats);
2598 return false;
2600 gcc_assert (!(**aglat)->next
2601 || (**aglat)->next->offset >= offset + val_size);
2602 return true;
2604 else
2606 struct ipcp_agg_lattice *new_al;
2608 if (**aglat && (**aglat)->offset < offset + val_size)
2610 set_agg_lats_to_bottom (dest_plats);
2611 return false;
2613 if (dest_plats->aggs_count == max_agg_items)
2614 return false;
2615 dest_plats->aggs_count++;
2616 new_al = ipcp_agg_lattice_pool.allocate ();
2617 memset (new_al, 0, sizeof (*new_al));
2619 new_al->offset = offset;
2620 new_al->size = val_size;
2621 new_al->contains_variable = pre_existing;
2623 new_al->next = **aglat;
2624 **aglat = new_al;
2625 return true;
2629 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2630 containing an unknown value. */
2632 static bool
2633 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2635 bool ret = false;
2636 while (aglat)
2638 ret |= aglat->set_contains_variable ();
2639 aglat = aglat->next;
2641 return ret;
2644 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2645 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2646 parameter used for lattice value sources. Return true if DEST_PLATS changed
2647 in any way. */
2649 static bool
2650 merge_aggregate_lattices (struct cgraph_edge *cs,
2651 class ipcp_param_lattices *dest_plats,
2652 class ipcp_param_lattices *src_plats,
2653 int src_idx, HOST_WIDE_INT offset_delta)
2655 bool pre_existing = dest_plats->aggs != NULL;
2656 struct ipcp_agg_lattice **dst_aglat;
2657 bool ret = false;
2659 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2660 return true;
2661 if (src_plats->aggs_bottom)
2662 return set_agg_lats_contain_variable (dest_plats);
2663 if (src_plats->aggs_contain_variable)
2664 ret |= set_agg_lats_contain_variable (dest_plats);
2665 dst_aglat = &dest_plats->aggs;
2667 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2668 param_ipa_max_agg_items);
2669 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2670 src_aglat;
2671 src_aglat = src_aglat->next)
2673 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2675 if (new_offset < 0)
2676 continue;
2677 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2678 &dst_aglat, pre_existing, &ret, max_agg_items))
2680 struct ipcp_agg_lattice *new_al = *dst_aglat;
2682 dst_aglat = &(*dst_aglat)->next;
2683 if (src_aglat->bottom)
2685 ret |= new_al->set_contains_variable ();
2686 continue;
2688 if (src_aglat->contains_variable)
2689 ret |= new_al->set_contains_variable ();
2690 for (ipcp_value<tree> *val = src_aglat->values;
2691 val;
2692 val = val->next)
2693 ret |= new_al->add_value (val->value, cs, val, src_idx,
2694 src_aglat->offset);
2696 else if (dest_plats->aggs_bottom)
2697 return true;
2699 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2700 return ret;
2703 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2704 pass-through JFUNC and if so, whether it has conform and conforms to the
2705 rules about propagating values passed by reference. */
2707 static bool
2708 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2709 struct ipa_jump_func *jfunc)
2711 return src_plats->aggs
2712 && (!src_plats->aggs_by_ref
2713 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2716 /* Propagate values through ITEM, jump function for a part of an aggregate,
2717 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2718 associated with the jump function. Return true if AGLAT changed in any
2719 way. */
2721 static bool
2722 propagate_aggregate_lattice (struct cgraph_edge *cs,
2723 struct ipa_agg_jf_item *item,
2724 struct ipcp_agg_lattice *aglat)
2726 class ipa_node_params *caller_info;
2727 class ipcp_param_lattices *src_plats;
2728 struct ipcp_lattice<tree> *src_lat;
2729 HOST_WIDE_INT src_offset;
2730 int src_idx;
2731 tree load_type;
2732 bool ret;
2734 if (item->jftype == IPA_JF_CONST)
2736 tree value = item->value.constant;
2738 gcc_checking_assert (is_gimple_ip_invariant (value));
2739 return aglat->add_value (value, cs, NULL, 0);
2742 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2743 || item->jftype == IPA_JF_LOAD_AGG);
2745 caller_info = ipa_node_params_sum->get (cs->caller);
2746 src_idx = item->value.pass_through.formal_id;
2747 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2749 if (item->jftype == IPA_JF_PASS_THROUGH)
2751 load_type = NULL_TREE;
2752 src_lat = &src_plats->itself;
2753 src_offset = -1;
2755 else
2757 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2758 struct ipcp_agg_lattice *src_aglat;
2760 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2761 if (src_aglat->offset >= load_offset)
2762 break;
2764 load_type = item->value.load_agg.type;
2765 if (!src_aglat
2766 || src_aglat->offset > load_offset
2767 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2768 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2769 return aglat->set_contains_variable ();
2771 src_lat = src_aglat;
2772 src_offset = load_offset;
2775 if (src_lat->bottom
2776 || (!ipcp_versionable_function_p (cs->caller)
2777 && !src_lat->is_single_const ()))
2778 return aglat->set_contains_variable ();
2780 ret = propagate_vals_across_arith_jfunc (cs,
2781 item->value.pass_through.operation,
2782 load_type,
2783 item->value.pass_through.operand,
2784 src_lat, aglat,
2785 src_offset,
2786 src_idx,
2787 item->type);
2789 if (src_lat->contains_variable)
2790 ret |= aglat->set_contains_variable ();
2792 return ret;
2795 /* Propagate scalar values across jump function JFUNC that is associated with
2796 edge CS and put the values into DEST_LAT. */
2798 static bool
2799 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2800 struct ipa_jump_func *jfunc,
2801 class ipcp_param_lattices *dest_plats)
2803 bool ret = false;
2805 if (dest_plats->aggs_bottom)
2806 return false;
2808 if (jfunc->type == IPA_JF_PASS_THROUGH
2809 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2811 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2812 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2813 class ipcp_param_lattices *src_plats;
2815 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2816 if (agg_pass_through_permissible_p (src_plats, jfunc))
2818 /* Currently we do not produce clobber aggregate jump
2819 functions, replace with merging when we do. */
2820 gcc_assert (!jfunc->agg.items);
2821 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2822 src_idx, 0);
2823 return ret;
2826 else if (jfunc->type == IPA_JF_ANCESTOR
2827 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2829 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2830 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2831 class ipcp_param_lattices *src_plats;
2833 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2834 if (src_plats->aggs && src_plats->aggs_by_ref)
2836 /* Currently we do not produce clobber aggregate jump
2837 functions, replace with merging when we do. */
2838 gcc_assert (!jfunc->agg.items);
2839 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2840 ipa_get_jf_ancestor_offset (jfunc));
2842 else if (!src_plats->aggs_by_ref)
2843 ret |= set_agg_lats_to_bottom (dest_plats);
2844 else
2845 ret |= set_agg_lats_contain_variable (dest_plats);
2846 return ret;
2849 if (jfunc->agg.items)
2851 bool pre_existing = dest_plats->aggs != NULL;
2852 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2853 struct ipa_agg_jf_item *item;
2854 int i;
2856 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2857 return true;
2859 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2860 param_ipa_max_agg_items);
2861 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2863 HOST_WIDE_INT val_size;
2865 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2866 continue;
2867 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2869 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2870 &aglat, pre_existing, &ret, max_agg_items))
2872 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2873 aglat = &(*aglat)->next;
2875 else if (dest_plats->aggs_bottom)
2876 return true;
2879 ret |= set_chain_of_aglats_contains_variable (*aglat);
2881 else
2882 ret |= set_agg_lats_contain_variable (dest_plats);
2884 return ret;
2887 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2888 non-thunk) destination, the call passes through a thunk. */
2890 static bool
2891 call_passes_through_thunk (cgraph_edge *cs)
2893 cgraph_node *alias_or_thunk = cs->callee;
2894 while (alias_or_thunk->alias)
2895 alias_or_thunk = alias_or_thunk->get_alias_target ();
2896 return alias_or_thunk->thunk;
2899 /* Propagate constants from the caller to the callee of CS. INFO describes the
2900 caller. */
2902 static bool
2903 propagate_constants_across_call (struct cgraph_edge *cs)
2905 class ipa_node_params *callee_info;
2906 enum availability availability;
2907 cgraph_node *callee;
2908 class ipa_edge_args *args;
2909 bool ret = false;
2910 int i, args_count, parms_count;
2912 callee = cs->callee->function_symbol (&availability);
2913 if (!callee->definition)
2914 return false;
2915 gcc_checking_assert (callee->has_gimple_body_p ());
2916 callee_info = ipa_node_params_sum->get (callee);
2917 if (!callee_info)
2918 return false;
2920 args = ipa_edge_args_sum->get (cs);
2921 parms_count = ipa_get_param_count (callee_info);
2922 if (parms_count == 0)
2923 return false;
2924 if (!args
2925 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2926 || !opt_for_fn (cs->caller->decl, optimize))
2928 for (i = 0; i < parms_count; i++)
2929 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2930 i));
2931 return ret;
2933 args_count = ipa_get_cs_argument_count (args);
2935 /* If this call goes through a thunk we must not propagate to the first (0th)
2936 parameter. However, we might need to uncover a thunk from below a series
2937 of aliases first. */
2938 if (call_passes_through_thunk (cs))
2940 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2941 0));
2942 i = 1;
2944 else
2945 i = 0;
2947 for (; (i < args_count) && (i < parms_count); i++)
2949 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2950 class ipcp_param_lattices *dest_plats;
2951 tree param_type = ipa_get_type (callee_info, i);
2953 dest_plats = ipa_get_parm_lattices (callee_info, i);
2954 if (availability == AVAIL_INTERPOSABLE)
2955 ret |= set_all_contains_variable (dest_plats);
2956 else
2958 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2959 &dest_plats->itself,
2960 param_type);
2961 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2962 &dest_plats->ctxlat);
2964 |= propagate_bits_across_jump_function (cs, i, jump_func,
2965 &dest_plats->bits_lattice);
2966 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2967 dest_plats);
2968 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2969 ret |= propagate_vr_across_jump_function (cs, jump_func,
2970 dest_plats, param_type);
2971 else
2972 ret |= dest_plats->m_value_range.set_to_bottom ();
2975 for (; i < parms_count; i++)
2976 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2978 return ret;
2981 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2982 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2983 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2985 static tree
2986 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2987 const vec<tree> &known_csts,
2988 const vec<ipa_polymorphic_call_context> &known_contexts,
2989 const vec<ipa_agg_value_set> &known_aggs,
2990 struct ipa_agg_replacement_value *agg_reps,
2991 bool *speculative)
2993 int param_index = ie->indirect_info->param_index;
2994 HOST_WIDE_INT anc_offset;
2995 tree t = NULL;
2996 tree target = NULL;
2998 *speculative = false;
3000 if (param_index == -1)
3001 return NULL_TREE;
3003 if (!ie->indirect_info->polymorphic)
3005 tree t = NULL;
3007 if (ie->indirect_info->agg_contents)
3009 t = NULL;
3010 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
3012 while (agg_reps)
3014 if (agg_reps->index == param_index
3015 && agg_reps->offset == ie->indirect_info->offset
3016 && agg_reps->by_ref == ie->indirect_info->by_ref)
3018 t = agg_reps->value;
3019 break;
3021 agg_reps = agg_reps->next;
3024 if (!t)
3026 const ipa_agg_value_set *agg;
3027 if (known_aggs.length () > (unsigned int) param_index)
3028 agg = &known_aggs[param_index];
3029 else
3030 agg = NULL;
3031 bool from_global_constant;
3032 t = ipa_find_agg_cst_for_param (agg,
3033 (unsigned) param_index
3034 < known_csts.length ()
3035 ? known_csts[param_index]
3036 : NULL,
3037 ie->indirect_info->offset,
3038 ie->indirect_info->by_ref,
3039 &from_global_constant);
3040 if (t
3041 && !from_global_constant
3042 && !ie->indirect_info->guaranteed_unmodified)
3043 t = NULL_TREE;
3046 else if ((unsigned) param_index < known_csts.length ())
3047 t = known_csts[param_index];
3049 if (t
3050 && TREE_CODE (t) == ADDR_EXPR
3051 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3052 return TREE_OPERAND (t, 0);
3053 else
3054 return NULL_TREE;
3057 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3058 return NULL_TREE;
3060 gcc_assert (!ie->indirect_info->agg_contents);
3061 anc_offset = ie->indirect_info->offset;
3063 t = NULL;
3065 /* Try to work out value of virtual table pointer value in replacements. */
3066 if (!t && agg_reps && !ie->indirect_info->by_ref)
3068 while (agg_reps)
3070 if (agg_reps->index == param_index
3071 && agg_reps->offset == ie->indirect_info->offset
3072 && agg_reps->by_ref)
3074 t = agg_reps->value;
3075 break;
3077 agg_reps = agg_reps->next;
3081 /* Try to work out value of virtual table pointer value in known
3082 aggregate values. */
3083 if (!t && known_aggs.length () > (unsigned int) param_index
3084 && !ie->indirect_info->by_ref)
3086 const ipa_agg_value_set *agg = &known_aggs[param_index];
3087 t = ipa_find_agg_cst_for_param (agg,
3088 (unsigned) param_index
3089 < known_csts.length ()
3090 ? known_csts[param_index] : NULL,
3091 ie->indirect_info->offset, true);
3094 /* If we found the virtual table pointer, lookup the target. */
3095 if (t)
3097 tree vtable;
3098 unsigned HOST_WIDE_INT offset;
3099 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3101 bool can_refer;
3102 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3103 vtable, offset, &can_refer);
3104 if (can_refer)
3106 if (!target
3107 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3108 || !possible_polymorphic_call_target_p
3109 (ie, cgraph_node::get (target)))
3111 /* Do not speculate builtin_unreachable, it is stupid! */
3112 if (ie->indirect_info->vptr_changed)
3113 return NULL;
3114 target = ipa_impossible_devirt_target (ie, target);
3116 *speculative = ie->indirect_info->vptr_changed;
3117 if (!*speculative)
3118 return target;
3123 /* Do we know the constant value of pointer? */
3124 if (!t && (unsigned) param_index < known_csts.length ())
3125 t = known_csts[param_index];
3127 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3129 ipa_polymorphic_call_context context;
3130 if (known_contexts.length () > (unsigned int) param_index)
3132 context = known_contexts[param_index];
3133 context.offset_by (anc_offset);
3134 if (ie->indirect_info->vptr_changed)
3135 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3136 ie->indirect_info->otr_type);
3137 if (t)
3139 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3140 (t, ie->indirect_info->otr_type, anc_offset);
3141 if (!ctx2.useless_p ())
3142 context.combine_with (ctx2, ie->indirect_info->otr_type);
3145 else if (t)
3147 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3148 anc_offset);
3149 if (ie->indirect_info->vptr_changed)
3150 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3151 ie->indirect_info->otr_type);
3153 else
3154 return NULL_TREE;
3156 vec <cgraph_node *>targets;
3157 bool final;
3159 targets = possible_polymorphic_call_targets
3160 (ie->indirect_info->otr_type,
3161 ie->indirect_info->otr_token,
3162 context, &final);
3163 if (!final || targets.length () > 1)
3165 struct cgraph_node *node;
3166 if (*speculative)
3167 return target;
3168 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3169 || ie->speculative || !ie->maybe_hot_p ())
3170 return NULL;
3171 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3172 ie->indirect_info->otr_token,
3173 context);
3174 if (node)
3176 *speculative = true;
3177 target = node->decl;
3179 else
3180 return NULL;
3182 else
3184 *speculative = false;
3185 if (targets.length () == 1)
3186 target = targets[0]->decl;
3187 else
3188 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3191 if (target && !possible_polymorphic_call_target_p (ie,
3192 cgraph_node::get (target)))
3194 if (*speculative)
3195 return NULL;
3196 target = ipa_impossible_devirt_target (ie, target);
3199 return target;
3202 /* If an indirect edge IE can be turned into a direct one based on data in
3203 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3204 whether the discovered target is only speculative guess. */
3206 tree
3207 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3208 ipa_call_arg_values *avals,
3209 bool *speculative)
3211 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3212 avals->m_known_contexts,
3213 avals->m_known_aggs,
3214 NULL, speculative);
3217 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3219 tree
3220 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3221 ipa_auto_call_arg_values *avals,
3222 bool *speculative)
3224 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3225 avals->m_known_contexts,
3226 avals->m_known_aggs,
3227 NULL, speculative);
3230 /* Calculate devirtualization time bonus for NODE, assuming we know information
3231 about arguments stored in AVALS. */
3233 static int
3234 devirtualization_time_bonus (struct cgraph_node *node,
3235 ipa_auto_call_arg_values *avals)
3237 struct cgraph_edge *ie;
3238 int res = 0;
3240 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3242 struct cgraph_node *callee;
3243 class ipa_fn_summary *isummary;
3244 enum availability avail;
3245 tree target;
3246 bool speculative;
3248 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3249 if (!target)
3250 continue;
3252 /* Only bare minimum benefit for clearly un-inlineable targets. */
3253 res += 1;
3254 callee = cgraph_node::get (target);
3255 if (!callee || !callee->definition)
3256 continue;
3257 callee = callee->function_symbol (&avail);
3258 if (avail < AVAIL_AVAILABLE)
3259 continue;
3260 isummary = ipa_fn_summaries->get (callee);
3261 if (!isummary || !isummary->inlinable)
3262 continue;
3264 int size = ipa_size_summaries->get (callee)->size;
3265 /* FIXME: The values below need re-considering and perhaps also
3266 integrating into the cost metrics, at lest in some very basic way. */
3267 int max_inline_insns_auto
3268 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3269 if (size <= max_inline_insns_auto / 4)
3270 res += 31 / ((int)speculative + 1);
3271 else if (size <= max_inline_insns_auto / 2)
3272 res += 15 / ((int)speculative + 1);
3273 else if (size <= max_inline_insns_auto
3274 || DECL_DECLARED_INLINE_P (callee->decl))
3275 res += 7 / ((int)speculative + 1);
3278 return res;
3281 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3283 static int
3284 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3286 int result = 0;
3287 ipa_hints hints = estimates.hints;
3288 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3289 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3291 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3293 if (hints & INLINE_HINT_loop_iterations)
3294 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3296 if (hints & INLINE_HINT_loop_stride)
3297 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3299 return result;
3302 /* If there is a reason to penalize the function described by INFO in the
3303 cloning goodness evaluation, do so. */
3305 static inline sreal
3306 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3307 sreal evaluation)
3309 if (info->node_within_scc && !info->node_is_self_scc)
3310 evaluation = (evaluation
3311 * (100 - opt_for_fn (node->decl,
3312 param_ipa_cp_recursion_penalty))) / 100;
3314 if (info->node_calling_single_call)
3315 evaluation = (evaluation
3316 * (100 - opt_for_fn (node->decl,
3317 param_ipa_cp_single_call_penalty)))
3318 / 100;
3320 return evaluation;
3323 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3324 and SIZE_COST and with the sum of frequencies of incoming edges to the
3325 potential new clone in FREQUENCIES. */
3327 static bool
3328 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3329 sreal freq_sum, profile_count count_sum,
3330 int size_cost)
3332 if (time_benefit == 0
3333 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3334 || node->optimize_for_size_p ())
3335 return false;
3337 gcc_assert (size_cost > 0);
3339 ipa_node_params *info = ipa_node_params_sum->get (node);
3340 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3341 if (count_sum > profile_count::zero ())
3343 gcc_assert (base_count > profile_count::zero ());
3344 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3345 sreal evaluation = (time_benefit * factor) / size_cost;
3346 evaluation = incorporate_penalties (node, info, evaluation);
3347 evaluation *= 1000;
3349 if (dump_file && (dump_flags & TDF_DETAILS))
3351 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3352 "size: %i, count_sum: ", time_benefit.to_double (),
3353 size_cost);
3354 count_sum.dump (dump_file);
3355 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3356 info->node_within_scc
3357 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3358 info->node_calling_single_call ? ", single_call" : "",
3359 evaluation.to_double (), eval_threshold);
3362 return evaluation.to_int () >= eval_threshold;
3364 else
3366 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3367 evaluation = incorporate_penalties (node, info, evaluation);
3368 evaluation *= 1000;
3370 if (dump_file && (dump_flags & TDF_DETAILS))
3371 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3372 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3373 "threshold: %i\n",
3374 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3375 info->node_within_scc
3376 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3377 info->node_calling_single_call ? ", single_call" : "",
3378 evaluation.to_double (), eval_threshold);
3380 return evaluation.to_int () >= eval_threshold;
3384 /* Return all context independent values from aggregate lattices in PLATS in a
3385 vector. Return NULL if there are none. */
3387 static vec<ipa_agg_value>
3388 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3390 vec<ipa_agg_value> res = vNULL;
3392 if (plats->aggs_bottom
3393 || plats->aggs_contain_variable
3394 || plats->aggs_count == 0)
3395 return vNULL;
3397 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3398 aglat;
3399 aglat = aglat->next)
3400 if (aglat->is_single_const ())
3402 struct ipa_agg_value item;
3403 item.offset = aglat->offset;
3404 item.value = aglat->values->value;
3405 res.safe_push (item);
3407 return res;
3410 /* Grow vectors in AVALS and fill them with information about values of
3411 parameters that are known to be independent of the context. Only calculate
3412 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3413 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3414 parameters will be stored in it.
3416 TODO: Also grow context independent value range vectors. */
3418 static bool
3419 gather_context_independent_values (class ipa_node_params *info,
3420 ipa_auto_call_arg_values *avals,
3421 bool calculate_aggs,
3422 int *removable_params_cost)
3424 int i, count = ipa_get_param_count (info);
3425 bool ret = false;
3427 avals->m_known_vals.safe_grow_cleared (count, true);
3428 avals->m_known_contexts.safe_grow_cleared (count, true);
3429 if (calculate_aggs)
3430 avals->m_known_aggs.safe_grow_cleared (count, true);
3432 if (removable_params_cost)
3433 *removable_params_cost = 0;
3435 for (i = 0; i < count; i++)
3437 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3438 ipcp_lattice<tree> *lat = &plats->itself;
3440 if (lat->is_single_const ())
3442 ipcp_value<tree> *val = lat->values;
3443 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3444 avals->m_known_vals[i] = val->value;
3445 if (removable_params_cost)
3446 *removable_params_cost
3447 += estimate_move_cost (TREE_TYPE (val->value), false);
3448 ret = true;
3450 else if (removable_params_cost
3451 && !ipa_is_param_used (info, i))
3452 *removable_params_cost
3453 += ipa_get_param_move_cost (info, i);
3455 if (!ipa_is_param_used (info, i))
3456 continue;
3458 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3459 /* Do not account known context as reason for cloning. We can see
3460 if it permits devirtualization. */
3461 if (ctxlat->is_single_const ())
3462 avals->m_known_contexts[i] = ctxlat->values->value;
3464 if (calculate_aggs)
3466 vec<ipa_agg_value> agg_items;
3467 struct ipa_agg_value_set *agg;
3469 agg_items = context_independent_aggregate_values (plats);
3470 agg = &avals->m_known_aggs[i];
3471 agg->items = agg_items;
3472 agg->by_ref = plats->aggs_by_ref;
3473 ret |= !agg_items.is_empty ();
3477 return ret;
3480 /* Perform time and size measurement of NODE with the context given in AVALS,
3481 calculate the benefit compared to the node without specialization and store
3482 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3483 context-independent or unused removable parameters and EST_MOVE_COST, the
3484 estimated movement of the considered parameter. */
3486 static void
3487 perform_estimation_of_a_value (cgraph_node *node,
3488 ipa_auto_call_arg_values *avals,
3489 int removable_params_cost, int est_move_cost,
3490 ipcp_value_base *val)
3492 sreal time_benefit;
3493 ipa_call_estimates estimates;
3495 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3497 /* Extern inline functions have no cloning local time benefits because they
3498 will be inlined anyway. The only reason to clone them is if it enables
3499 optimization in any of the functions they call. */
3500 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3501 time_benefit = 0;
3502 else
3503 time_benefit = (estimates.nonspecialized_time - estimates.time)
3504 + (devirtualization_time_bonus (node, avals)
3505 + hint_time_bonus (node, estimates)
3506 + removable_params_cost + est_move_cost);
3508 int size = estimates.size;
3509 gcc_checking_assert (size >=0);
3510 /* The inliner-heuristics based estimates may think that in certain
3511 contexts some functions do not have any size at all but we want
3512 all specializations to have at least a tiny cost, not least not to
3513 divide by zero. */
3514 if (size == 0)
3515 size = 1;
3517 val->local_time_benefit = time_benefit;
3518 val->local_size_cost = size;
3521 /* Get the overall limit oof growth based on parameters extracted from growth.
3522 it does not really make sense to mix functions with different overall growth
3523 limits but it is possible and if it happens, we do not want to select one
3524 limit at random. */
3526 static long
3527 get_max_overall_size (cgraph_node *node)
3529 long max_new_size = orig_overall_size;
3530 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3531 if (max_new_size < large_unit)
3532 max_new_size = large_unit;
3533 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3534 max_new_size += max_new_size * unit_growth / 100 + 1;
3535 return max_new_size;
3538 /* Iterate over known values of parameters of NODE and estimate the local
3539 effects in terms of time and size they have. */
3541 static void
3542 estimate_local_effects (struct cgraph_node *node)
3544 ipa_node_params *info = ipa_node_params_sum->get (node);
3545 int i, count = ipa_get_param_count (info);
3546 bool always_const;
3547 int removable_params_cost;
3549 if (!count || !ipcp_versionable_function_p (node))
3550 return;
3552 if (dump_file && (dump_flags & TDF_DETAILS))
3553 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3555 ipa_auto_call_arg_values avals;
3556 always_const = gather_context_independent_values (info, &avals, true,
3557 &removable_params_cost);
3558 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3559 if (always_const || devirt_bonus
3560 || (removable_params_cost && node->can_change_signature))
3562 struct caller_statistics stats;
3563 ipa_call_estimates estimates;
3565 init_caller_stats (&stats);
3566 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3567 false);
3568 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3569 sreal time = estimates.nonspecialized_time - estimates.time;
3570 time += devirt_bonus;
3571 time += hint_time_bonus (node, estimates);
3572 time += removable_params_cost;
3573 int size = estimates.size - stats.n_calls * removable_params_cost;
3575 if (dump_file)
3576 fprintf (dump_file, " - context independent values, size: %i, "
3577 "time_benefit: %f\n", size, (time).to_double ());
3579 if (size <= 0 || node->local)
3581 info->do_clone_for_all_contexts = true;
3583 if (dump_file)
3584 fprintf (dump_file, " Decided to specialize for all "
3585 "known contexts, code not going to grow.\n");
3587 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3588 stats.count_sum, size))
3590 if (size + overall_size <= get_max_overall_size (node))
3592 info->do_clone_for_all_contexts = true;
3593 overall_size += size;
3595 if (dump_file)
3596 fprintf (dump_file, " Decided to specialize for all "
3597 "known contexts, growth (to %li) deemed "
3598 "beneficial.\n", overall_size);
3600 else if (dump_file && (dump_flags & TDF_DETAILS))
3601 fprintf (dump_file, " Not cloning for all contexts because "
3602 "maximum unit size would be reached with %li.\n",
3603 size + overall_size);
3605 else if (dump_file && (dump_flags & TDF_DETAILS))
3606 fprintf (dump_file, " Not cloning for all contexts because "
3607 "!good_cloning_opportunity_p.\n");
3611 for (i = 0; i < count; i++)
3613 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3614 ipcp_lattice<tree> *lat = &plats->itself;
3615 ipcp_value<tree> *val;
3617 if (lat->bottom
3618 || !lat->values
3619 || avals.m_known_vals[i])
3620 continue;
3622 for (val = lat->values; val; val = val->next)
3624 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3625 avals.m_known_vals[i] = val->value;
3627 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3628 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3629 emc, val);
3631 if (dump_file && (dump_flags & TDF_DETAILS))
3633 fprintf (dump_file, " - estimates for value ");
3634 print_ipcp_constant_value (dump_file, val->value);
3635 fprintf (dump_file, " for ");
3636 ipa_dump_param (dump_file, info, i);
3637 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3638 val->local_time_benefit.to_double (),
3639 val->local_size_cost);
3642 avals.m_known_vals[i] = NULL_TREE;
3645 for (i = 0; i < count; i++)
3647 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3649 if (!plats->virt_call)
3650 continue;
3652 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3653 ipcp_value<ipa_polymorphic_call_context> *val;
3655 if (ctxlat->bottom
3656 || !ctxlat->values
3657 || !avals.m_known_contexts[i].useless_p ())
3658 continue;
3660 for (val = ctxlat->values; val; val = val->next)
3662 avals.m_known_contexts[i] = val->value;
3663 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3664 0, val);
3666 if (dump_file && (dump_flags & TDF_DETAILS))
3668 fprintf (dump_file, " - estimates for polymorphic context ");
3669 print_ipcp_constant_value (dump_file, val->value);
3670 fprintf (dump_file, " for ");
3671 ipa_dump_param (dump_file, info, i);
3672 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3673 val->local_time_benefit.to_double (),
3674 val->local_size_cost);
3677 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3680 for (i = 0; i < count; i++)
3682 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3684 if (plats->aggs_bottom || !plats->aggs)
3685 continue;
3687 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3688 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3690 ipcp_value<tree> *val;
3691 if (aglat->bottom || !aglat->values
3692 /* If the following is true, the one value is in known_aggs. */
3693 || (!plats->aggs_contain_variable
3694 && aglat->is_single_const ()))
3695 continue;
3697 for (val = aglat->values; val; val = val->next)
3699 struct ipa_agg_value item;
3701 item.offset = aglat->offset;
3702 item.value = val->value;
3703 agg->items.safe_push (item);
3705 perform_estimation_of_a_value (node, &avals,
3706 removable_params_cost, 0, val);
3708 if (dump_file && (dump_flags & TDF_DETAILS))
3710 fprintf (dump_file, " - estimates for value ");
3711 print_ipcp_constant_value (dump_file, val->value);
3712 fprintf (dump_file, " for ");
3713 ipa_dump_param (dump_file, info, i);
3714 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3715 "]: time_benefit: %g, size: %i\n",
3716 plats->aggs_by_ref ? "ref " : "",
3717 aglat->offset,
3718 val->local_time_benefit.to_double (),
3719 val->local_size_cost);
3722 agg->items.pop ();
3729 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3730 topological sort of values. */
3732 template <typename valtype>
3733 void
3734 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3736 ipcp_value_source<valtype> *src;
3738 if (cur_val->dfs)
3739 return;
3741 dfs_counter++;
3742 cur_val->dfs = dfs_counter;
3743 cur_val->low_link = dfs_counter;
3745 cur_val->topo_next = stack;
3746 stack = cur_val;
3747 cur_val->on_stack = true;
3749 for (src = cur_val->sources; src; src = src->next)
3750 if (src->val)
3752 if (src->val->dfs == 0)
3754 add_val (src->val);
3755 if (src->val->low_link < cur_val->low_link)
3756 cur_val->low_link = src->val->low_link;
3758 else if (src->val->on_stack
3759 && src->val->dfs < cur_val->low_link)
3760 cur_val->low_link = src->val->dfs;
3763 if (cur_val->dfs == cur_val->low_link)
3765 ipcp_value<valtype> *v, *scc_list = NULL;
3769 v = stack;
3770 stack = v->topo_next;
3771 v->on_stack = false;
3772 v->scc_no = cur_val->dfs;
3774 v->scc_next = scc_list;
3775 scc_list = v;
3777 while (v != cur_val);
3779 cur_val->topo_next = values_topo;
3780 values_topo = cur_val;
3784 /* Add all values in lattices associated with NODE to the topological sort if
3785 they are not there yet. */
3787 static void
3788 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3790 ipa_node_params *info = ipa_node_params_sum->get (node);
3791 int i, count = ipa_get_param_count (info);
3793 for (i = 0; i < count; i++)
3795 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3796 ipcp_lattice<tree> *lat = &plats->itself;
3797 struct ipcp_agg_lattice *aglat;
3799 if (!lat->bottom)
3801 ipcp_value<tree> *val;
3802 for (val = lat->values; val; val = val->next)
3803 topo->constants.add_val (val);
3806 if (!plats->aggs_bottom)
3807 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3808 if (!aglat->bottom)
3810 ipcp_value<tree> *val;
3811 for (val = aglat->values; val; val = val->next)
3812 topo->constants.add_val (val);
3815 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3816 if (!ctxlat->bottom)
3818 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3819 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3820 topo->contexts.add_val (ctxval);
3825 /* One pass of constants propagation along the call graph edges, from callers
3826 to callees (requires topological ordering in TOPO), iterate over strongly
3827 connected components. */
3829 static void
3830 propagate_constants_topo (class ipa_topo_info *topo)
3832 int i;
3834 for (i = topo->nnodes - 1; i >= 0; i--)
3836 unsigned j;
3837 struct cgraph_node *v, *node = topo->order[i];
3838 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3840 /* First, iteratively propagate within the strongly connected component
3841 until all lattices stabilize. */
3842 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3843 if (v->has_gimple_body_p ())
3845 if (opt_for_fn (v->decl, flag_ipa_cp)
3846 && opt_for_fn (v->decl, optimize))
3847 push_node_to_stack (topo, v);
3848 /* When V is not optimized, we can not push it to stack, but
3849 still we need to set all its callees lattices to bottom. */
3850 else
3852 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3853 propagate_constants_across_call (cs);
3857 v = pop_node_from_stack (topo);
3858 while (v)
3860 struct cgraph_edge *cs;
3861 class ipa_node_params *info = NULL;
3862 bool self_scc = true;
3864 for (cs = v->callees; cs; cs = cs->next_callee)
3865 if (ipa_edge_within_scc (cs))
3867 cgraph_node *callee = cs->callee->function_symbol ();
3869 if (v != callee)
3870 self_scc = false;
3872 if (!info)
3874 info = ipa_node_params_sum->get (v);
3875 info->node_within_scc = true;
3878 if (propagate_constants_across_call (cs))
3879 push_node_to_stack (topo, callee);
3882 if (info)
3883 info->node_is_self_scc = self_scc;
3885 v = pop_node_from_stack (topo);
3888 /* Afterwards, propagate along edges leading out of the SCC, calculates
3889 the local effects of the discovered constants and all valid values to
3890 their topological sort. */
3891 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3892 if (v->has_gimple_body_p ()
3893 && opt_for_fn (v->decl, flag_ipa_cp)
3894 && opt_for_fn (v->decl, optimize))
3896 struct cgraph_edge *cs;
3898 estimate_local_effects (v);
3899 add_all_node_vals_to_toposort (v, topo);
3900 for (cs = v->callees; cs; cs = cs->next_callee)
3901 if (!ipa_edge_within_scc (cs))
3902 propagate_constants_across_call (cs);
3904 cycle_nodes.release ();
3908 /* Propagate the estimated effects of individual values along the topological
3909 from the dependent values to those they depend on. */
3911 template <typename valtype>
3912 void
3913 value_topo_info<valtype>::propagate_effects ()
3915 ipcp_value<valtype> *base;
3916 hash_set<ipcp_value<valtype> *> processed_srcvals;
3918 for (base = values_topo; base; base = base->topo_next)
3920 ipcp_value_source<valtype> *src;
3921 ipcp_value<valtype> *val;
3922 sreal time = 0;
3923 HOST_WIDE_INT size = 0;
3925 for (val = base; val; val = val->scc_next)
3927 time = time + val->local_time_benefit + val->prop_time_benefit;
3928 size = size + val->local_size_cost + val->prop_size_cost;
3931 for (val = base; val; val = val->scc_next)
3933 processed_srcvals.empty ();
3934 for (src = val->sources; src; src = src->next)
3935 if (src->val
3936 && src->cs->maybe_hot_p ())
3938 if (!processed_srcvals.add (src->val))
3940 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3941 if (prop_size < INT_MAX)
3942 src->val->prop_size_cost = prop_size;
3943 else
3944 continue;
3947 int special_factor = 1;
3948 if (val->same_scc (src->val))
3949 special_factor
3950 = opt_for_fn(src->cs->caller->decl,
3951 param_ipa_cp_recursive_freq_factor);
3952 else if (val->self_recursion_generated_p ()
3953 && (src->cs->callee->function_symbol ()
3954 == src->cs->caller))
3956 int max_recur_gen_depth
3957 = opt_for_fn(src->cs->caller->decl,
3958 param_ipa_cp_max_recursive_depth);
3959 special_factor = max_recur_gen_depth
3960 - val->self_recursion_generated_level + 1;
3963 src->val->prop_time_benefit
3964 += time * special_factor * src->cs->sreal_frequency ();
3967 if (size < INT_MAX)
3969 val->prop_time_benefit = time;
3970 val->prop_size_cost = size;
3972 else
3974 val->prop_time_benefit = 0;
3975 val->prop_size_cost = 0;
3981 /* Callback for qsort to sort counts of all edges. */
3983 static int
3984 compare_edge_profile_counts (const void *a, const void *b)
3986 const profile_count *cnt1 = (const profile_count *) a;
3987 const profile_count *cnt2 = (const profile_count *) b;
3989 if (*cnt1 < *cnt2)
3990 return 1;
3991 if (*cnt1 > *cnt2)
3992 return -1;
3993 return 0;
3997 /* Propagate constants, polymorphic contexts and their effects from the
3998 summaries interprocedurally. */
4000 static void
4001 ipcp_propagate_stage (class ipa_topo_info *topo)
4003 struct cgraph_node *node;
4005 if (dump_file)
4006 fprintf (dump_file, "\n Propagating constants:\n\n");
4008 base_count = profile_count::uninitialized ();
4010 bool compute_count_base = false;
4011 unsigned base_count_pos_percent = 0;
4012 FOR_EACH_DEFINED_FUNCTION (node)
4014 if (node->has_gimple_body_p ()
4015 && opt_for_fn (node->decl, flag_ipa_cp)
4016 && opt_for_fn (node->decl, optimize))
4018 ipa_node_params *info = ipa_node_params_sum->get (node);
4019 determine_versionability (node, info);
4021 unsigned nlattices = ipa_get_param_count (info);
4022 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4023 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4024 initialize_node_lattices (node);
4026 ipa_size_summary *s = ipa_size_summaries->get (node);
4027 if (node->definition && !node->alias && s != NULL)
4028 overall_size += s->self_size;
4029 if (node->count.ipa ().initialized_p ())
4031 compute_count_base = true;
4032 unsigned pos_percent = opt_for_fn (node->decl,
4033 param_ipa_cp_profile_count_base);
4034 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4038 if (compute_count_base)
4040 auto_vec<profile_count> all_edge_counts;
4041 all_edge_counts.reserve_exact (symtab->edges_count);
4042 FOR_EACH_DEFINED_FUNCTION (node)
4043 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4045 profile_count count = cs->count.ipa ();
4046 if (!(count > profile_count::zero ()))
4047 continue;
4049 enum availability avail;
4050 cgraph_node *tgt
4051 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4052 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4053 if (info && info->versionable)
4054 all_edge_counts.quick_push (count);
4057 if (!all_edge_counts.is_empty ())
4059 gcc_assert (base_count_pos_percent <= 100);
4060 all_edge_counts.qsort (compare_edge_profile_counts);
4062 unsigned base_count_pos
4063 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4064 base_count = all_edge_counts[base_count_pos];
4066 if (dump_file)
4068 fprintf (dump_file, "\nSelected base_count from %u edges at "
4069 "position %u, arriving at: ", all_edge_counts.length (),
4070 base_count_pos);
4071 base_count.dump (dump_file);
4072 fprintf (dump_file, "\n");
4075 else if (dump_file)
4076 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4077 "continuing as if without profile feedback.\n");
4080 orig_overall_size = overall_size;
4082 if (dump_file)
4083 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4085 propagate_constants_topo (topo);
4086 if (flag_checking)
4087 ipcp_verify_propagated_values ();
4088 topo->constants.propagate_effects ();
4089 topo->contexts.propagate_effects ();
4091 if (dump_file)
4093 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4094 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4098 /* Discover newly direct outgoing edges from NODE which is a new clone with
4099 known KNOWN_CSTS and make them direct. */
4101 static void
4102 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4103 vec<tree> known_csts,
4104 vec<ipa_polymorphic_call_context>
4105 known_contexts,
4106 struct ipa_agg_replacement_value *aggvals)
4108 struct cgraph_edge *ie, *next_ie;
4109 bool found = false;
4111 for (ie = node->indirect_calls; ie; ie = next_ie)
4113 tree target;
4114 bool speculative;
4116 next_ie = ie->next_callee;
4117 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4118 vNULL, aggvals, &speculative);
4119 if (target)
4121 bool agg_contents = ie->indirect_info->agg_contents;
4122 bool polymorphic = ie->indirect_info->polymorphic;
4123 int param_index = ie->indirect_info->param_index;
4124 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4125 speculative);
4126 found = true;
4128 if (cs && !agg_contents && !polymorphic)
4130 ipa_node_params *info = ipa_node_params_sum->get (node);
4131 int c = ipa_get_controlled_uses (info, param_index);
4132 if (c != IPA_UNDESCRIBED_USE
4133 && !ipa_get_param_load_dereferenced (info, param_index))
4135 struct ipa_ref *to_del;
4137 c--;
4138 ipa_set_controlled_uses (info, param_index, c);
4139 if (dump_file && (dump_flags & TDF_DETAILS))
4140 fprintf (dump_file, " controlled uses count of param "
4141 "%i bumped down to %i\n", param_index, c);
4142 if (c == 0
4143 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4145 if (dump_file && (dump_flags & TDF_DETAILS))
4146 fprintf (dump_file, " and even removing its "
4147 "cloning-created reference\n");
4148 to_del->remove_reference ();
4154 /* Turning calls to direct calls will improve overall summary. */
4155 if (found)
4156 ipa_update_overall_fn_summary (node);
4159 class edge_clone_summary;
4160 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4162 /* Edge clone summary. */
4164 class edge_clone_summary
4166 public:
4167 /* Default constructor. */
4168 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4170 /* Default destructor. */
4171 ~edge_clone_summary ()
4173 if (prev_clone)
4174 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4175 if (next_clone)
4176 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4179 cgraph_edge *prev_clone;
4180 cgraph_edge *next_clone;
4183 class edge_clone_summary_t:
4184 public call_summary <edge_clone_summary *>
4186 public:
4187 edge_clone_summary_t (symbol_table *symtab):
4188 call_summary <edge_clone_summary *> (symtab)
4190 m_initialize_when_cloning = true;
4193 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4194 edge_clone_summary *src_data,
4195 edge_clone_summary *dst_data) final override;
4198 /* Edge duplication hook. */
4200 void
4201 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4202 edge_clone_summary *src_data,
4203 edge_clone_summary *dst_data)
4205 if (src_data->next_clone)
4206 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4207 dst_data->prev_clone = src_edge;
4208 dst_data->next_clone = src_data->next_clone;
4209 src_data->next_clone = dst_edge;
4212 /* Return true is CS calls DEST or its clone for all contexts. When
4213 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4214 edges from/to an all-context clone. */
4216 static bool
4217 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4218 bool allow_recursion_to_clone)
4220 enum availability availability;
4221 cgraph_node *callee = cs->callee->function_symbol (&availability);
4223 if (availability <= AVAIL_INTERPOSABLE)
4224 return false;
4225 if (callee == dest)
4226 return true;
4227 if (!allow_recursion_to_clone && cs->caller == callee)
4228 return false;
4230 ipa_node_params *info = ipa_node_params_sum->get (callee);
4231 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4234 /* Return true if edge CS does bring about the value described by SRC to
4235 DEST_VAL of node DEST or its clone for all contexts. */
4237 static bool
4238 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4239 cgraph_node *dest, ipcp_value<tree> *dest_val)
4241 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4243 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4244 || caller_info->node_dead)
4245 return false;
4247 if (!src->val)
4248 return true;
4250 if (caller_info->ipcp_orig_node)
4252 tree t;
4253 if (src->offset == -1)
4254 t = caller_info->known_csts[src->index];
4255 else
4256 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4257 return (t != NULL_TREE
4258 && values_equal_for_ipcp_p (src->val->value, t));
4260 else
4262 if (src->val == dest_val)
4263 return true;
4265 struct ipcp_agg_lattice *aglat;
4266 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4267 src->index);
4268 if (src->offset == -1)
4269 return (plats->itself.is_single_const ()
4270 && values_equal_for_ipcp_p (src->val->value,
4271 plats->itself.values->value));
4272 else
4274 if (plats->aggs_bottom || plats->aggs_contain_variable)
4275 return false;
4276 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4277 if (aglat->offset == src->offset)
4278 return (aglat->is_single_const ()
4279 && values_equal_for_ipcp_p (src->val->value,
4280 aglat->values->value));
4282 return false;
4286 /* Return true if edge CS does bring about the value described by SRC to
4287 DST_VAL of node DEST or its clone for all contexts. */
4289 static bool
4290 cgraph_edge_brings_value_p (cgraph_edge *cs,
4291 ipcp_value_source<ipa_polymorphic_call_context> *src,
4292 cgraph_node *dest,
4293 ipcp_value<ipa_polymorphic_call_context> *)
4295 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4297 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4298 || caller_info->node_dead)
4299 return false;
4300 if (!src->val)
4301 return true;
4303 if (caller_info->ipcp_orig_node)
4304 return (caller_info->known_contexts.length () > (unsigned) src->index)
4305 && values_equal_for_ipcp_p (src->val->value,
4306 caller_info->known_contexts[src->index]);
4308 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4309 src->index);
4310 return plats->ctxlat.is_single_const ()
4311 && values_equal_for_ipcp_p (src->val->value,
4312 plats->ctxlat.values->value);
4315 /* Get the next clone in the linked list of clones of an edge. */
4317 static inline struct cgraph_edge *
4318 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4320 edge_clone_summary *s = edge_clone_summaries->get (cs);
4321 return s != NULL ? s->next_clone : NULL;
4324 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4325 of them is viable and hot, return true. In that case, for those that still
4326 hold, add their edge frequency and their number and cumulative profile
4327 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4328 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4330 template <typename valtype>
4331 static bool
4332 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4333 sreal *freq_sum, int *caller_count,
4334 profile_count *rec_count_sum,
4335 profile_count *nonrec_count_sum)
4337 ipcp_value_source<valtype> *src;
4338 sreal freq = 0;
4339 int count = 0;
4340 profile_count rec_cnt = profile_count::zero ();
4341 profile_count nonrec_cnt = profile_count::zero ();
4342 bool hot = false;
4343 bool non_self_recursive = false;
4345 for (src = val->sources; src; src = src->next)
4347 struct cgraph_edge *cs = src->cs;
4348 while (cs)
4350 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4352 count++;
4353 freq += cs->sreal_frequency ();
4354 hot |= cs->maybe_hot_p ();
4355 if (cs->caller != dest)
4357 non_self_recursive = true;
4358 if (cs->count.ipa ().initialized_p ())
4359 rec_cnt += cs->count.ipa ();
4361 else if (cs->count.ipa ().initialized_p ())
4362 nonrec_cnt += cs->count.ipa ();
4364 cs = get_next_cgraph_edge_clone (cs);
4368 /* If the only edges bringing a value are self-recursive ones, do not bother
4369 evaluating it. */
4370 if (!non_self_recursive)
4371 return false;
4373 *freq_sum = freq;
4374 *caller_count = count;
4375 *rec_count_sum = rec_cnt;
4376 *nonrec_count_sum = nonrec_cnt;
4378 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4380 struct cgraph_edge *cs;
4382 /* Cold non-SCC source edge could trigger hot recursive execution of
4383 function. Consider the case as hot and rely on following cost model
4384 computation to further select right one. */
4385 for (cs = dest->callers; cs; cs = cs->next_caller)
4386 if (cs->caller == dest && cs->maybe_hot_p ())
4387 return true;
4390 return hot;
4393 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4394 to let a non-self-recursive caller be the first element. Thus, we can
4395 simplify intersecting operations on values that arrive from all of these
4396 callers, especially when there exists self-recursive call. Return true if
4397 this kind of adjustment is possible. */
4399 static bool
4400 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4401 cgraph_node *node)
4403 for (unsigned i = 0; i < callers.length (); i++)
4405 cgraph_edge *cs = callers[i];
4407 if (cs->caller != node)
4409 if (i > 0)
4411 callers[i] = callers[0];
4412 callers[0] = cs;
4414 return true;
4417 return false;
4420 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4421 is assumed their number is known and equal to CALLER_COUNT. */
4423 template <typename valtype>
4424 static vec<cgraph_edge *>
4425 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4426 int caller_count)
4428 ipcp_value_source<valtype> *src;
4429 vec<cgraph_edge *> ret;
4431 ret.create (caller_count);
4432 for (src = val->sources; src; src = src->next)
4434 struct cgraph_edge *cs = src->cs;
4435 while (cs)
4437 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4438 ret.quick_push (cs);
4439 cs = get_next_cgraph_edge_clone (cs);
4443 if (caller_count > 1)
4444 adjust_callers_for_value_intersection (ret, dest);
4446 return ret;
4449 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4450 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4451 should be set to true when the reference created for the constant should be
4452 a load one and not an address one because the corresponding parameter p is
4453 only used as *p. */
4455 static struct ipa_replace_map *
4456 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4457 bool force_load_ref)
4459 struct ipa_replace_map *replace_map;
4461 replace_map = ggc_alloc<ipa_replace_map> ();
4462 if (dump_file)
4464 fprintf (dump_file, " replacing ");
4465 ipa_dump_param (dump_file, info, parm_num);
4467 fprintf (dump_file, " with const ");
4468 print_generic_expr (dump_file, value);
4470 if (force_load_ref)
4471 fprintf (dump_file, " - forcing load reference\n");
4472 else
4473 fprintf (dump_file, "\n");
4475 replace_map->parm_num = parm_num;
4476 replace_map->new_tree = value;
4477 replace_map->force_load_ref = force_load_ref;
4478 return replace_map;
4481 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4482 one, otherwise it will be referred to as the original node. */
4484 static void
4485 dump_profile_updates (cgraph_node *node, bool spec)
4487 if (spec)
4488 fprintf (dump_file, " setting count of the specialized node %s to ",
4489 node->dump_name ());
4490 else
4491 fprintf (dump_file, " setting count of the original node %s to ",
4492 node->dump_name ());
4494 node->count.dump (dump_file);
4495 fprintf (dump_file, "\n");
4496 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4498 fprintf (dump_file, " edge to %s has count ",
4499 cs->callee->dump_name ());
4500 cs->count.dump (dump_file);
4501 fprintf (dump_file, "\n");
4505 /* With partial train run we do not want to assume that original's count is
4506 zero whenever we redurect all executed edges to clone. Simply drop profile
4507 to local one in this case. In eany case, return the new value. ORIG_NODE
4508 is the original node and its count has not been updaed yet. */
4510 profile_count
4511 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4513 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4514 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4515 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4516 remainder = remainder.guessed_local ();
4518 return remainder;
4521 /* Structure to sum counts coming from nodes other than the original node and
4522 its clones. */
4524 struct gather_other_count_struct
4526 cgraph_node *orig;
4527 profile_count other_count;
4530 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4531 counts that come from non-self-recursive calls.. */
4533 static bool
4534 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4536 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4537 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4538 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4539 desc->other_count += cs->count.ipa ();
4540 return false;
4543 /* Structure to help analyze if we need to boost counts of some clones of some
4544 non-recursive edges to match the new callee count. */
4546 struct desc_incoming_count_struct
4548 cgraph_node *orig;
4549 hash_set <cgraph_edge *> *processed_edges;
4550 profile_count count;
4551 unsigned unproc_orig_rec_edges;
4554 /* Go over edges calling NODE and its thunks and gather information about
4555 incoming counts so that we know if we need to make any adjustments. */
4557 static void
4558 analyze_clone_icoming_counts (cgraph_node *node,
4559 desc_incoming_count_struct *desc)
4561 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4562 if (cs->caller->thunk)
4564 analyze_clone_icoming_counts (cs->caller, desc);
4565 continue;
4567 else
4569 if (cs->count.initialized_p ())
4570 desc->count += cs->count.ipa ();
4571 if (!desc->processed_edges->contains (cs)
4572 && cs->caller->clone_of == desc->orig)
4573 desc->unproc_orig_rec_edges++;
4577 /* If caller edge counts of a clone created for a self-recursive arithmetic
4578 jump function must be adjusted because it is coming from a the "seed" clone
4579 for the first value and so has been excessively scaled back as if it was not
4580 a recursive call, adjust it so that the incoming counts of NODE match its
4581 count. NODE is the node or its thunk. */
4583 static void
4584 adjust_clone_incoming_counts (cgraph_node *node,
4585 desc_incoming_count_struct *desc)
4587 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4588 if (cs->caller->thunk)
4590 adjust_clone_incoming_counts (cs->caller, desc);
4591 profile_count sum = profile_count::zero ();
4592 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4593 if (e->count.initialized_p ())
4594 sum += e->count.ipa ();
4595 cs->count = cs->count.combine_with_ipa_count (sum);
4597 else if (!desc->processed_edges->contains (cs)
4598 && cs->caller->clone_of == desc->orig)
4600 cs->count += desc->count;
4601 if (dump_file)
4603 fprintf (dump_file, " Adjusted count of an incoming edge of "
4604 "a clone %s -> %s to ", cs->caller->dump_name (),
4605 cs->callee->dump_name ());
4606 cs->count.dump (dump_file);
4607 fprintf (dump_file, "\n");
4612 /* When ORIG_NODE has been cloned for values which have been generated fora
4613 self-recursive call as a result of an arithmetic pass-through
4614 jump-functions, adjust its count together with counts of all such clones in
4615 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4617 The function sums the counts of the original node and all its clones that
4618 cannot be attributed to a specific clone because it comes from a
4619 non-recursive edge. This sum is then evenly divided between the clones and
4620 on top of that each one gets all the counts which can be attributed directly
4621 to it. */
4623 static void
4624 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4625 const vec<cgraph_node *> &self_gen_clones)
4627 profile_count redist_sum = orig_node->count.ipa ();
4628 if (!(redist_sum > profile_count::zero ()))
4629 return;
4631 if (dump_file)
4632 fprintf (dump_file, " Updating profile of self recursive clone "
4633 "series\n");
4635 gather_other_count_struct gocs;
4636 gocs.orig = orig_node;
4637 gocs.other_count = profile_count::zero ();
4639 auto_vec <profile_count, 8> other_edges_count;
4640 for (cgraph_node *n : self_gen_clones)
4642 gocs.other_count = profile_count::zero ();
4643 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4644 &gocs, false);
4645 other_edges_count.safe_push (gocs.other_count);
4646 redist_sum -= gocs.other_count;
4649 hash_set<cgraph_edge *> processed_edges;
4650 unsigned i = 0;
4651 for (cgraph_node *n : self_gen_clones)
4653 profile_count orig_count = n->count;
4654 profile_count new_count
4655 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4656 new_count = lenient_count_portion_handling (new_count, orig_node);
4657 n->count = new_count;
4658 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4659 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4661 cs->count = cs->count.apply_scale (new_count, orig_count);
4662 processed_edges.add (cs);
4664 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4665 cs->count = cs->count.apply_scale (new_count, orig_count);
4667 i++;
4670 /* There are still going to be edges to ORIG_NODE that have one or more
4671 clones coming from another node clone in SELF_GEN_CLONES and which we
4672 scaled by the same amount, which means that the total incoming sum of
4673 counts to ORIG_NODE will be too high, scale such edges back. */
4674 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4676 if (cs->callee->ultimate_alias_target () == orig_node)
4678 unsigned den = 0;
4679 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4680 if (e->callee->ultimate_alias_target () == orig_node
4681 && processed_edges.contains (e))
4682 den++;
4683 if (den > 0)
4684 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4685 if (e->callee->ultimate_alias_target () == orig_node
4686 && processed_edges.contains (e))
4687 e->count /= den;
4691 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4692 along self-recursive edges are likely to have fairly low count and so
4693 edges from them to nodes in the self_gen_clones do not correspond to the
4694 artificially distributed count of the nodes, the total sum of incoming
4695 edges to some clones might be too low. Detect this situation and correct
4696 it. */
4697 for (cgraph_node *n : self_gen_clones)
4699 if (!(n->count.ipa () > profile_count::zero ()))
4700 continue;
4702 desc_incoming_count_struct desc;
4703 desc.orig = orig_node;
4704 desc.processed_edges = &processed_edges;
4705 desc.count = profile_count::zero ();
4706 desc.unproc_orig_rec_edges = 0;
4707 analyze_clone_icoming_counts (n, &desc);
4709 if (n->count.differs_from_p (desc.count))
4711 if (n->count > desc.count
4712 && desc.unproc_orig_rec_edges > 0)
4714 desc.count = n->count - desc.count;
4715 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4716 adjust_clone_incoming_counts (n, &desc);
4718 else if (dump_file)
4719 fprintf (dump_file,
4720 " Unable to fix up incoming counts for %s.\n",
4721 n->dump_name ());
4725 if (dump_file)
4726 for (cgraph_node *n : self_gen_clones)
4727 dump_profile_updates (n, n != orig_node);
4728 return;
4731 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4732 their profile information to reflect this. This function should not be used
4733 for clones generated for arithmetic pass-through jump functions on a
4734 self-recursive call graph edge, that situation is handled by
4735 update_counts_for_self_gen_clones. */
4737 static void
4738 update_profiling_info (struct cgraph_node *orig_node,
4739 struct cgraph_node *new_node)
4741 struct caller_statistics stats;
4742 profile_count new_sum;
4743 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4745 if (!(orig_node_count > profile_count::zero ()))
4746 return;
4748 if (dump_file)
4750 fprintf (dump_file, " Updating profile from original count: ");
4751 orig_node_count.dump (dump_file);
4752 fprintf (dump_file, "\n");
4755 init_caller_stats (&stats, new_node);
4756 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4757 false);
4758 new_sum = stats.count_sum;
4760 if (new_sum > orig_node_count)
4762 /* TODO: Perhaps this should be gcc_unreachable ()? */
4763 remainder = profile_count::zero ().guessed_local ();
4765 else if (stats.rec_count_sum.nonzero_p ())
4767 int new_nonrec_calls = stats.n_nonrec_calls;
4768 /* There are self-recursive edges which are likely to bring in the
4769 majority of calls but which we must divide in between the original and
4770 new node. */
4771 init_caller_stats (&stats, orig_node);
4772 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4773 &stats, false);
4774 int orig_nonrec_calls = stats.n_nonrec_calls;
4775 profile_count orig_nonrec_call_count = stats.count_sum;
4777 if (orig_node->local)
4779 if (!orig_nonrec_call_count.nonzero_p ())
4781 if (dump_file)
4782 fprintf (dump_file, " The original is local and the only "
4783 "incoming edges from non-dead callers with nonzero "
4784 "counts are self-recursive, assuming it is cold.\n");
4785 /* The NEW_NODE count and counts of all its outgoing edges
4786 are still unmodified copies of ORIG_NODE's. Just clear
4787 the latter and bail out. */
4788 profile_count zero;
4789 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4790 zero = profile_count::zero ().guessed_local ();
4791 else
4792 zero = profile_count::adjusted_zero ();
4793 orig_node->count = zero;
4794 for (cgraph_edge *cs = orig_node->callees;
4796 cs = cs->next_callee)
4797 cs->count = zero;
4798 for (cgraph_edge *cs = orig_node->indirect_calls;
4800 cs = cs->next_callee)
4801 cs->count = zero;
4802 return;
4805 else
4807 /* Let's behave as if there was another caller that accounts for all
4808 the calls that were either indirect or from other compilation
4809 units. */
4810 orig_nonrec_calls++;
4811 profile_count pretend_caller_count
4812 = (orig_node_count - new_sum - orig_nonrec_call_count
4813 - stats.rec_count_sum);
4814 orig_nonrec_call_count += pretend_caller_count;
4817 /* Divide all "unexplained" counts roughly proportionally to sums of
4818 counts of non-recursive calls.
4820 We put rather arbitrary limits on how many counts we claim because the
4821 number of non-self-recursive incoming count is only a rough guideline
4822 and there are cases (such as mcf) where using it blindly just takes
4823 too many. And if lattices are considered in the opposite order we
4824 could also take too few. */
4825 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4827 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4828 profile_count new_part
4829 = MAX(MIN (unexp.apply_scale (new_sum,
4830 new_sum + orig_nonrec_call_count),
4831 unexp.apply_scale (limit_den - 1, limit_den)),
4832 unexp.apply_scale (new_nonrec_calls, limit_den));
4833 if (dump_file)
4835 fprintf (dump_file, " Claiming ");
4836 new_part.dump (dump_file);
4837 fprintf (dump_file, " of unexplained ");
4838 unexp.dump (dump_file);
4839 fprintf (dump_file, " counts because of self-recursive "
4840 "calls\n");
4842 new_sum += new_part;
4843 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4844 orig_node);
4846 else
4847 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4848 orig_node);
4850 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4851 new_node->count = new_sum;
4852 orig_node->count = remainder;
4854 profile_count orig_new_node_count = orig_node_count;
4855 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4856 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4857 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4858 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4859 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4861 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4862 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4863 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4864 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4865 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4867 if (dump_file)
4869 dump_profile_updates (new_node, true);
4870 dump_profile_updates (orig_node, false);
4874 /* Update the respective profile of specialized NEW_NODE and the original
4875 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4876 have been redirected to the specialized version. */
4878 static void
4879 update_specialized_profile (struct cgraph_node *new_node,
4880 struct cgraph_node *orig_node,
4881 profile_count redirected_sum)
4883 struct cgraph_edge *cs;
4884 profile_count new_node_count, orig_node_count = orig_node->count;
4886 if (dump_file)
4888 fprintf (dump_file, " the sum of counts of redirected edges is ");
4889 redirected_sum.dump (dump_file);
4890 fprintf (dump_file, "\n");
4892 if (!(orig_node_count > profile_count::zero ()))
4893 return;
4895 gcc_assert (orig_node_count >= redirected_sum);
4897 new_node_count = new_node->count;
4898 new_node->count += redirected_sum;
4899 orig_node->count -= redirected_sum;
4901 for (cs = new_node->callees; cs; cs = cs->next_callee)
4902 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4904 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4906 profile_count dec = cs->count.apply_scale (redirected_sum,
4907 orig_node_count);
4908 cs->count -= dec;
4911 if (dump_file)
4913 dump_profile_updates (new_node, true);
4914 dump_profile_updates (orig_node, false);
4918 static void adjust_references_in_caller (cgraph_edge *cs,
4919 symtab_node *symbol, int index);
4921 /* Simple structure to pass a symbol and index (with same meaning as parameters
4922 of adjust_references_in_caller) through a void* parameter of a
4923 call_for_symbol_thunks_and_aliases callback. */
4924 struct symbol_and_index_together
4926 symtab_node *symbol;
4927 int index;
4930 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4931 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4932 static bool
4933 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4935 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4936 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4937 if (!cs->caller->thunk)
4938 adjust_references_in_caller (cs, pack->symbol, pack->index);
4939 return false;
4942 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4943 variable which is only dereferenced and which is represented by SYMBOL. See
4944 if we can remove ADDR reference in callers assosiated witht the call. */
4946 static void
4947 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4949 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4950 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4951 if (jfunc->type == IPA_JF_CONST)
4953 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4954 cs->lto_stmt_uid);
4955 if (!to_del)
4956 return;
4957 to_del->remove_reference ();
4958 if (dump_file)
4959 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4960 cs->caller->dump_name (), symbol->dump_name ());
4961 return;
4964 if (jfunc->type != IPA_JF_PASS_THROUGH
4965 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
4966 return;
4968 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4969 cgraph_node *caller = cs->caller;
4970 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4971 /* TODO: This consistency check may be too big and not really
4972 that useful. Consider removing it. */
4973 tree cst;
4974 if (caller_info->ipcp_orig_node)
4975 cst = caller_info->known_csts[fidx];
4976 else
4978 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4979 gcc_assert (lat->is_single_const ());
4980 cst = lat->values->value;
4982 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4983 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4984 == symbol));
4986 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4987 if (cuses == IPA_UNDESCRIBED_USE)
4988 return;
4989 gcc_assert (cuses > 0);
4990 cuses--;
4991 ipa_set_controlled_uses (caller_info, fidx, cuses);
4992 if (cuses)
4993 return;
4995 if (caller_info->ipcp_orig_node)
4997 /* Cloning machinery has created a reference here, we need to either
4998 remove it or change it to a read one. */
4999 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
5000 if (to_del && to_del->use == IPA_REF_ADDR)
5002 to_del->remove_reference ();
5003 if (dump_file)
5004 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5005 cs->caller->dump_name (), symbol->dump_name ());
5006 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5008 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5009 if (dump_file)
5010 fprintf (dump_file,
5011 " ...and replaced it with LOAD one.\n");
5016 symbol_and_index_together pack;
5017 pack.symbol = symbol;
5018 pack.index = fidx;
5019 if (caller->can_change_signature)
5020 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5021 &pack, true);
5025 /* Return true if we would like to remove a parameter from NODE when cloning it
5026 with KNOWN_CSTS scalar constants. */
5028 static bool
5029 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5031 auto_vec<bool, 16> surviving;
5032 bool filled_vec = false;
5033 ipa_node_params *info = ipa_node_params_sum->get (node);
5034 int i, count = ipa_get_param_count (info);
5036 for (i = 0; i < count; i++)
5038 if (!known_csts[i] && ipa_is_param_used (info, i))
5039 continue;
5041 if (!filled_vec)
5043 clone_info *info = clone_info::get (node);
5044 if (!info || !info->param_adjustments)
5045 return true;
5046 info->param_adjustments->get_surviving_params (&surviving);
5047 filled_vec = true;
5049 if (surviving.length() < (unsigned) i && surviving[i])
5050 return true;
5052 return false;
5055 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5056 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5057 redirect all edges in CALLERS to it. */
5059 static struct cgraph_node *
5060 create_specialized_node (struct cgraph_node *node,
5061 vec<tree> known_csts,
5062 vec<ipa_polymorphic_call_context> known_contexts,
5063 struct ipa_agg_replacement_value *aggvals,
5064 vec<cgraph_edge *> &callers)
5066 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5067 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5068 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5069 struct ipa_agg_replacement_value *av;
5070 struct cgraph_node *new_node;
5071 int i, count = ipa_get_param_count (info);
5072 clone_info *cinfo = clone_info::get (node);
5073 ipa_param_adjustments *old_adjustments = cinfo
5074 ? cinfo->param_adjustments : NULL;
5075 ipa_param_adjustments *new_adjustments;
5076 gcc_assert (!info->ipcp_orig_node);
5077 gcc_assert (node->can_change_signature
5078 || !old_adjustments);
5080 if (old_adjustments)
5082 /* At the moment all IPA optimizations should use the number of
5083 parameters of the prevailing decl as the m_always_copy_start.
5084 Handling any other value would complicate the code below, so for the
5085 time bing let's only assert it is so. */
5086 gcc_assert (old_adjustments->m_always_copy_start == count
5087 || old_adjustments->m_always_copy_start < 0);
5088 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5089 for (i = 0; i < old_adj_count; i++)
5091 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5092 if (!node->can_change_signature
5093 || old_adj->op != IPA_PARAM_OP_COPY
5094 || (!known_csts[old_adj->base_index]
5095 && ipa_is_param_used (info, old_adj->base_index)))
5097 ipa_adjusted_param new_adj = *old_adj;
5099 new_adj.prev_clone_adjustment = true;
5100 new_adj.prev_clone_index = i;
5101 vec_safe_push (new_params, new_adj);
5104 bool skip_return = old_adjustments->m_skip_return;
5105 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5106 ipa_param_adjustments (new_params, count,
5107 skip_return));
5109 else if (node->can_change_signature
5110 && want_remove_some_param_p (node, known_csts))
5112 ipa_adjusted_param adj;
5113 memset (&adj, 0, sizeof (adj));
5114 adj.op = IPA_PARAM_OP_COPY;
5115 for (i = 0; i < count; i++)
5116 if (!known_csts[i] && ipa_is_param_used (info, i))
5118 adj.base_index = i;
5119 adj.prev_clone_index = i;
5120 vec_safe_push (new_params, adj);
5122 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5123 ipa_param_adjustments (new_params, count, false));
5125 else
5126 new_adjustments = NULL;
5128 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5129 for (i = callers.length () - 1; i >= 0; i--)
5131 cgraph_edge *cs = callers[i];
5132 if (cs->caller == node)
5134 self_recursive_calls.safe_push (cs);
5135 callers.unordered_remove (i);
5138 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5139 for (i = 0; i < count; i++)
5141 tree t = known_csts[i];
5142 if (!t)
5143 continue;
5145 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5147 bool load_ref = false;
5148 symtab_node *ref_symbol;
5149 if (TREE_CODE (t) == ADDR_EXPR)
5151 tree base = get_base_address (TREE_OPERAND (t, 0));
5152 if (TREE_CODE (base) == VAR_DECL
5153 && ipa_get_controlled_uses (info, i) == 0
5154 && ipa_get_param_load_dereferenced (info, i)
5155 && (ref_symbol = symtab_node::get (base)))
5157 load_ref = true;
5158 if (node->can_change_signature)
5159 for (cgraph_edge *caller : callers)
5160 adjust_references_in_caller (caller, ref_symbol, i);
5164 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5165 if (replace_map)
5166 vec_safe_push (replace_trees, replace_map);
5169 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5170 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5171 node->decl)));
5172 new_node = node->create_virtual_clone (callers, replace_trees,
5173 new_adjustments, "constprop",
5174 suffix_counter);
5175 suffix_counter++;
5177 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5178 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5180 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5181 /* Cloned edges can disappear during cloning as speculation can be
5182 resolved, check that we have one and that it comes from the last
5183 cloning. */
5184 if (cs && cs->caller == new_node)
5185 cs->redirect_callee_duplicating_thunks (new_node);
5186 /* Any future code that would make more than one clone of an outgoing
5187 edge would confuse this mechanism, so let's check that does not
5188 happen. */
5189 gcc_checking_assert (!cs
5190 || !get_next_cgraph_edge_clone (cs)
5191 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5193 if (have_self_recursive_calls)
5194 new_node->expand_all_artificial_thunks ();
5196 ipa_set_node_agg_value_chain (new_node, aggvals);
5197 for (av = aggvals; av; av = av->next)
5198 new_node->maybe_create_reference (av->value, NULL);
5200 if (dump_file && (dump_flags & TDF_DETAILS))
5202 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5203 if (known_contexts.exists ())
5205 for (i = 0; i < count; i++)
5206 if (!known_contexts[i].useless_p ())
5208 fprintf (dump_file, " known ctx %i is ", i);
5209 known_contexts[i].dump (dump_file);
5212 if (aggvals)
5213 ipa_dump_agg_replacement_values (dump_file, aggvals);
5216 new_info = ipa_node_params_sum->get (new_node);
5217 new_info->ipcp_orig_node = node;
5218 new_node->ipcp_clone = true;
5219 new_info->known_csts = known_csts;
5220 new_info->known_contexts = known_contexts;
5222 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5224 return new_node;
5227 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5228 pass-through function to itself when the cgraph_node involved is not an
5229 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5230 no-operation pass-through. */
5232 static bool
5233 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5234 bool simple = true)
5236 enum availability availability;
5237 if (cs->caller == cs->callee->function_symbol (&availability)
5238 && availability > AVAIL_INTERPOSABLE
5239 && jfunc->type == IPA_JF_PASS_THROUGH
5240 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5241 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5242 && ipa_node_params_sum->get (cs->caller)
5243 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5244 return true;
5245 return false;
5248 /* Return true if JFUNC, which describes a part of an aggregate represented or
5249 pointed to by the i-th parameter of call CS, is a pass-through function to
5250 itself when the cgraph_node involved is not an IPA-CP clone.. When
5251 SIMPLE is true, further check if JFUNC is a simple no-operation
5252 pass-through. */
5254 static bool
5255 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5256 int i, bool simple = true)
5258 enum availability availability;
5259 if (cs->caller == cs->callee->function_symbol (&availability)
5260 && availability > AVAIL_INTERPOSABLE
5261 && jfunc->jftype == IPA_JF_LOAD_AGG
5262 && jfunc->offset == jfunc->value.load_agg.offset
5263 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5264 && jfunc->value.pass_through.formal_id == i
5265 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5266 && ipa_node_params_sum->get (cs->caller)
5267 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5268 return true;
5269 return false;
5272 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5273 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5275 static void
5276 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5277 vec<tree> &known_csts,
5278 const vec<cgraph_edge *> &callers)
5280 ipa_node_params *info = ipa_node_params_sum->get (node);
5281 int i, count = ipa_get_param_count (info);
5283 for (i = 0; i < count; i++)
5285 struct cgraph_edge *cs;
5286 tree newval = NULL_TREE;
5287 int j;
5288 bool first = true;
5289 tree type = ipa_get_type (info, i);
5291 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5292 continue;
5294 FOR_EACH_VEC_ELT (callers, j, cs)
5296 struct ipa_jump_func *jump_func;
5297 tree t;
5299 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5300 if (!args
5301 || i >= ipa_get_cs_argument_count (args)
5302 || (i == 0
5303 && call_passes_through_thunk (cs)))
5305 newval = NULL_TREE;
5306 break;
5308 jump_func = ipa_get_ith_jump_func (args, i);
5310 /* Besides simple pass-through jump function, arithmetic jump
5311 function could also introduce argument-direct-pass-through for
5312 self-feeding recursive call. For example,
5314 fn (int i)
5316 fn (i & 1);
5319 Given that i is 0, recursive propagation via (i & 1) also gets
5320 0. */
5321 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5323 gcc_assert (newval);
5324 t = ipa_get_jf_arith_result (
5325 ipa_get_jf_pass_through_operation (jump_func),
5326 newval,
5327 ipa_get_jf_pass_through_operand (jump_func),
5328 type);
5330 else
5331 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5332 jump_func, type);
5333 if (!t
5334 || (newval
5335 && !values_equal_for_ipcp_p (t, newval))
5336 || (!first && !newval))
5338 newval = NULL_TREE;
5339 break;
5341 else
5342 newval = t;
5343 first = false;
5346 if (newval)
5348 if (dump_file && (dump_flags & TDF_DETAILS))
5350 fprintf (dump_file, " adding an extra known scalar value ");
5351 print_ipcp_constant_value (dump_file, newval);
5352 fprintf (dump_file, " for ");
5353 ipa_dump_param (dump_file, info, i);
5354 fprintf (dump_file, "\n");
5357 known_csts[i] = newval;
5362 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5363 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5364 CALLERS. */
5366 static void
5367 find_more_contexts_for_caller_subset (cgraph_node *node,
5368 vec<ipa_polymorphic_call_context>
5369 *known_contexts,
5370 const vec<cgraph_edge *> &callers)
5372 ipa_node_params *info = ipa_node_params_sum->get (node);
5373 int i, count = ipa_get_param_count (info);
5375 for (i = 0; i < count; i++)
5377 cgraph_edge *cs;
5379 if (ipa_get_poly_ctx_lat (info, i)->bottom
5380 || (known_contexts->exists ()
5381 && !(*known_contexts)[i].useless_p ()))
5382 continue;
5384 ipa_polymorphic_call_context newval;
5385 bool first = true;
5386 int j;
5388 FOR_EACH_VEC_ELT (callers, j, cs)
5390 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5391 if (!args
5392 || i >= ipa_get_cs_argument_count (args))
5393 return;
5394 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5395 ipa_polymorphic_call_context ctx;
5396 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5397 cs, i, jfunc);
5398 if (first)
5400 newval = ctx;
5401 first = false;
5403 else
5404 newval.meet_with (ctx);
5405 if (newval.useless_p ())
5406 break;
5409 if (!newval.useless_p ())
5411 if (dump_file && (dump_flags & TDF_DETAILS))
5413 fprintf (dump_file, " adding an extra known polymorphic "
5414 "context ");
5415 print_ipcp_constant_value (dump_file, newval);
5416 fprintf (dump_file, " for ");
5417 ipa_dump_param (dump_file, info, i);
5418 fprintf (dump_file, "\n");
5421 if (!known_contexts->exists ())
5422 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5423 true);
5424 (*known_contexts)[i] = newval;
5430 /* Go through PLATS and create a vector of values consisting of values and
5431 offsets (minus OFFSET) of lattices that contain only a single value. */
5433 static vec<ipa_agg_value>
5434 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5436 vec<ipa_agg_value> res = vNULL;
5438 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5439 return vNULL;
5441 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5442 if (aglat->is_single_const ())
5444 struct ipa_agg_value ti;
5445 ti.offset = aglat->offset - offset;
5446 ti.value = aglat->values->value;
5447 res.safe_push (ti);
5449 return res;
5452 /* Intersect all values in INTER with single value lattices in PLATS (while
5453 subtracting OFFSET). */
5455 static void
5456 intersect_with_plats (class ipcp_param_lattices *plats,
5457 vec<ipa_agg_value> *inter,
5458 HOST_WIDE_INT offset)
5460 struct ipcp_agg_lattice *aglat;
5461 struct ipa_agg_value *item;
5462 int k;
5464 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5466 inter->release ();
5467 return;
5470 aglat = plats->aggs;
5471 FOR_EACH_VEC_ELT (*inter, k, item)
5473 bool found = false;
5474 if (!item->value)
5475 continue;
5476 while (aglat)
5478 if (aglat->offset - offset > item->offset)
5479 break;
5480 if (aglat->offset - offset == item->offset)
5482 if (aglat->is_single_const ())
5484 tree value = aglat->values->value;
5486 if (values_equal_for_ipcp_p (item->value, value))
5487 found = true;
5489 break;
5491 aglat = aglat->next;
5493 if (!found)
5494 item->value = NULL_TREE;
5498 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5499 vector result while subtracting OFFSET from the individual value offsets. */
5501 static vec<ipa_agg_value>
5502 agg_replacements_to_vector (struct cgraph_node *node, int index,
5503 HOST_WIDE_INT offset)
5505 struct ipa_agg_replacement_value *av;
5506 vec<ipa_agg_value> res = vNULL;
5508 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5509 if (av->index == index
5510 && (av->offset - offset) >= 0)
5512 struct ipa_agg_value item;
5513 gcc_checking_assert (av->value);
5514 item.offset = av->offset - offset;
5515 item.value = av->value;
5516 res.safe_push (item);
5519 return res;
5522 /* Intersect all values in INTER with those that we have already scheduled to
5523 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5524 (while subtracting OFFSET). */
5526 static void
5527 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5528 vec<ipa_agg_value> *inter,
5529 HOST_WIDE_INT offset)
5531 struct ipa_agg_replacement_value *srcvals;
5532 struct ipa_agg_value *item;
5533 int i;
5535 srcvals = ipa_get_agg_replacements_for_node (node);
5536 if (!srcvals)
5538 inter->release ();
5539 return;
5542 FOR_EACH_VEC_ELT (*inter, i, item)
5544 struct ipa_agg_replacement_value *av;
5545 bool found = false;
5546 if (!item->value)
5547 continue;
5548 for (av = srcvals; av; av = av->next)
5550 gcc_checking_assert (av->value);
5551 if (av->index == index
5552 && av->offset - offset == item->offset)
5554 if (values_equal_for_ipcp_p (item->value, av->value))
5555 found = true;
5556 break;
5559 if (!found)
5560 item->value = NULL_TREE;
5564 /* Intersect values in INTER with aggregate values that come along edge CS to
5565 parameter number INDEX and return it. If INTER does not actually exist yet,
5566 copy all incoming values to it. If we determine we ended up with no values
5567 whatsoever, return a released vector. */
5569 static vec<ipa_agg_value>
5570 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5571 vec<ipa_agg_value> inter)
5573 struct ipa_jump_func *jfunc;
5574 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5575 if (jfunc->type == IPA_JF_PASS_THROUGH
5576 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5578 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5579 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5581 if (caller_info->ipcp_orig_node)
5583 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5584 class ipcp_param_lattices *orig_plats;
5585 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5586 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5587 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5589 if (!inter.exists ())
5590 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5591 else
5592 intersect_with_agg_replacements (cs->caller, src_idx,
5593 &inter, 0);
5594 return inter;
5597 else
5599 class ipcp_param_lattices *src_plats;
5600 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5601 if (agg_pass_through_permissible_p (src_plats, jfunc))
5603 /* Currently we do not produce clobber aggregate jump
5604 functions, adjust when we do. */
5605 gcc_checking_assert (!jfunc->agg.items);
5606 if (!inter.exists ())
5607 inter = copy_plats_to_inter (src_plats, 0);
5608 else
5609 intersect_with_plats (src_plats, &inter, 0);
5610 return inter;
5614 else if (jfunc->type == IPA_JF_ANCESTOR
5615 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5617 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5618 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5619 class ipcp_param_lattices *src_plats;
5620 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5622 if (caller_info->ipcp_orig_node)
5624 if (!inter.exists ())
5625 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5626 else
5627 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5628 delta);
5630 else
5632 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5633 /* Currently we do not produce clobber aggregate jump
5634 functions, adjust when we do. */
5635 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5636 if (!inter.exists ())
5637 inter = copy_plats_to_inter (src_plats, delta);
5638 else
5639 intersect_with_plats (src_plats, &inter, delta);
5641 return inter;
5644 if (jfunc->agg.items)
5646 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5647 struct ipa_agg_value *item;
5648 int k;
5650 if (!inter.exists ())
5651 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5653 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5654 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5655 agg_item);
5656 if (value)
5658 struct ipa_agg_value agg_value;
5660 agg_value.value = value;
5661 agg_value.offset = agg_item->offset;
5662 inter.safe_push (agg_value);
5665 else
5666 FOR_EACH_VEC_ELT (inter, k, item)
5668 int l = 0;
5669 bool found = false;
5671 if (!item->value)
5672 continue;
5674 while ((unsigned) l < jfunc->agg.items->length ())
5676 struct ipa_agg_jf_item *ti;
5677 ti = &(*jfunc->agg.items)[l];
5678 if (ti->offset > item->offset)
5679 break;
5680 if (ti->offset == item->offset)
5682 tree value;
5684 /* Besides simple pass-through aggregate jump function,
5685 arithmetic aggregate jump function could also bring
5686 same aggregate value as parameter passed-in for
5687 self-feeding recursive call. For example,
5689 fn (int *i)
5691 int j = *i & 1;
5692 fn (&j);
5695 Given that *i is 0, recursive propagation via (*i & 1)
5696 also gets 0. */
5697 if (self_recursive_agg_pass_through_p (cs, ti, index,
5698 false))
5699 value = ipa_get_jf_arith_result (
5700 ti->value.pass_through.operation,
5701 item->value,
5702 ti->value.pass_through.operand,
5703 ti->type);
5704 else
5705 value = ipa_agg_value_from_node (caller_info,
5706 cs->caller, ti);
5708 if (value && values_equal_for_ipcp_p (item->value, value))
5709 found = true;
5710 break;
5712 l++;
5714 if (!found)
5715 item->value = NULL;
5718 else
5720 inter.release ();
5721 return vNULL;
5723 return inter;
5726 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5727 from all of them. */
5729 static struct ipa_agg_replacement_value *
5730 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5731 const vec<cgraph_edge *> &callers)
5733 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5734 struct ipa_agg_replacement_value *res;
5735 struct ipa_agg_replacement_value **tail = &res;
5736 struct cgraph_edge *cs;
5737 int i, j, count = ipa_get_param_count (dest_info);
5739 FOR_EACH_VEC_ELT (callers, j, cs)
5741 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5742 if (!args)
5744 count = 0;
5745 break;
5747 int c = ipa_get_cs_argument_count (args);
5748 if (c < count)
5749 count = c;
5752 for (i = 0; i < count; i++)
5754 struct cgraph_edge *cs;
5755 vec<ipa_agg_value> inter = vNULL;
5756 struct ipa_agg_value *item;
5757 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5758 int j;
5760 /* Among other things, the following check should deal with all by_ref
5761 mismatches. */
5762 if (plats->aggs_bottom)
5763 continue;
5765 FOR_EACH_VEC_ELT (callers, j, cs)
5767 struct ipa_jump_func *jfunc
5768 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5769 if (self_recursive_pass_through_p (cs, jfunc, i)
5770 && (!plats->aggs_by_ref
5771 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5772 continue;
5773 inter = intersect_aggregates_with_edge (cs, i, inter);
5775 if (!inter.exists ())
5776 goto next_param;
5779 FOR_EACH_VEC_ELT (inter, j, item)
5781 struct ipa_agg_replacement_value *v;
5783 if (!item->value)
5784 continue;
5786 v = ggc_alloc<ipa_agg_replacement_value> ();
5787 v->index = i;
5788 v->offset = item->offset;
5789 v->value = item->value;
5790 v->by_ref = plats->aggs_by_ref;
5791 *tail = v;
5792 tail = &v->next;
5795 next_param:
5796 if (inter.exists ())
5797 inter.release ();
5799 *tail = NULL;
5800 return res;
5803 /* Determine whether CS also brings all scalar values that the NODE is
5804 specialized for. */
5806 static bool
5807 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5808 struct cgraph_node *node)
5810 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5811 int count = ipa_get_param_count (dest_info);
5812 class ipa_node_params *caller_info;
5813 class ipa_edge_args *args;
5814 int i;
5816 caller_info = ipa_node_params_sum->get (cs->caller);
5817 args = ipa_edge_args_sum->get (cs);
5818 for (i = 0; i < count; i++)
5820 struct ipa_jump_func *jump_func;
5821 tree val, t;
5823 val = dest_info->known_csts[i];
5824 if (!val)
5825 continue;
5827 if (i >= ipa_get_cs_argument_count (args))
5828 return false;
5829 jump_func = ipa_get_ith_jump_func (args, i);
5830 t = ipa_value_from_jfunc (caller_info, jump_func,
5831 ipa_get_type (dest_info, i));
5832 if (!t || !values_equal_for_ipcp_p (val, t))
5833 return false;
5835 return true;
5838 /* Determine whether CS also brings all aggregate values that NODE is
5839 specialized for. */
5840 static bool
5841 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5842 struct cgraph_node *node)
5844 struct ipa_agg_replacement_value *aggval;
5845 int i, ec, count;
5847 aggval = ipa_get_agg_replacements_for_node (node);
5848 if (!aggval)
5849 return true;
5851 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5852 count = ipa_get_param_count (clone_node_info);
5853 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5854 if (ec < count)
5855 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5856 if (aggval->index >= ec)
5857 return false;
5859 ipa_node_params *orig_node_info
5860 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5862 for (i = 0; i < count; i++)
5864 class ipcp_param_lattices *plats;
5865 bool interesting = false;
5866 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5867 if (aggval->index == i)
5869 interesting = true;
5870 break;
5872 if (!interesting)
5873 continue;
5875 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5876 if (plats->aggs_bottom)
5877 return false;
5879 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5880 if (!values.exists ())
5881 return false;
5883 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5884 if (aggval->index == i)
5886 struct ipa_agg_value *item;
5887 int j;
5888 bool found = false;
5889 FOR_EACH_VEC_ELT (values, j, item)
5890 if (item->value
5891 && item->offset == av->offset
5892 && values_equal_for_ipcp_p (item->value, av->value))
5894 found = true;
5895 break;
5897 if (!found)
5899 values.release ();
5900 return false;
5903 values.release ();
5905 return true;
5908 /* Given an original NODE and a VAL for which we have already created a
5909 specialized clone, look whether there are incoming edges that still lead
5910 into the old node but now also bring the requested value and also conform to
5911 all other criteria such that they can be redirected the special node.
5912 This function can therefore redirect the final edge in a SCC. */
5914 template <typename valtype>
5915 static void
5916 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5918 ipcp_value_source<valtype> *src;
5919 profile_count redirected_sum = profile_count::zero ();
5921 for (src = val->sources; src; src = src->next)
5923 struct cgraph_edge *cs = src->cs;
5924 while (cs)
5926 if (cgraph_edge_brings_value_p (cs, src, node, val)
5927 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5928 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5930 if (dump_file)
5931 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5932 cs->caller->dump_name (),
5933 val->spec_node->dump_name ());
5935 cs->redirect_callee_duplicating_thunks (val->spec_node);
5936 val->spec_node->expand_all_artificial_thunks ();
5937 if (cs->count.ipa ().initialized_p ())
5938 redirected_sum = redirected_sum + cs->count.ipa ();
5940 cs = get_next_cgraph_edge_clone (cs);
5944 if (redirected_sum.nonzero_p ())
5945 update_specialized_profile (val->spec_node, node, redirected_sum);
5948 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5950 static bool
5951 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5953 ipa_polymorphic_call_context *ctx;
5954 int i;
5956 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5957 if (!ctx->useless_p ())
5958 return true;
5959 return false;
5962 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5964 static vec<ipa_polymorphic_call_context>
5965 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5967 if (known_contexts_useful_p (known_contexts))
5968 return known_contexts.copy ();
5969 else
5970 return vNULL;
5973 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5974 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5975 copy too. */
5977 static void
5978 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5979 vec<tree> *known_csts,
5980 vec<ipa_polymorphic_call_context> *known_contexts,
5981 ipcp_value<tree> *val, int index)
5983 *known_csts = avals->m_known_vals.copy ();
5984 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5985 (*known_csts)[index] = val->value;
5988 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5989 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5990 INDEX. */
5992 static void
5993 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5994 vec<tree> *known_csts,
5995 vec<ipa_polymorphic_call_context> *known_contexts,
5996 ipcp_value<ipa_polymorphic_call_context> *val,
5997 int index)
5999 *known_csts = avals->m_known_vals.copy ();
6000 *known_contexts = avals->m_known_contexts.copy ();
6001 (*known_contexts)[index] = val->value;
6004 /* Return true if OFFSET indicates this was not an aggregate value or there is
6005 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6006 AGGVALS list. */
6008 DEBUG_FUNCTION bool
6009 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
6010 int index, HOST_WIDE_INT offset, tree value)
6012 if (offset == -1)
6013 return true;
6015 while (aggvals)
6017 if (aggvals->index == index
6018 && aggvals->offset == offset
6019 && values_equal_for_ipcp_p (aggvals->value, value))
6020 return true;
6021 aggvals = aggvals->next;
6023 return false;
6026 /* Return true if offset is minus one because source of a polymorphic context
6027 cannot be an aggregate value. */
6029 DEBUG_FUNCTION bool
6030 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6031 int , HOST_WIDE_INT offset,
6032 ipa_polymorphic_call_context)
6034 return offset == -1;
6037 /* Decide whether to create a special version of NODE for value VAL of
6038 parameter at the given INDEX. If OFFSET is -1, the value is for the
6039 parameter itself, otherwise it is stored at the given OFFSET of the
6040 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6041 is a vector which contains clones created for self-recursive calls with an
6042 arithmetic pass-through jump function. */
6044 template <typename valtype>
6045 static bool
6046 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6047 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6048 vec<cgraph_node *> *self_gen_clones)
6050 struct ipa_agg_replacement_value *aggvals;
6051 int caller_count;
6052 sreal freq_sum;
6053 profile_count count_sum, rec_count_sum;
6054 vec<cgraph_edge *> callers;
6056 if (val->spec_node)
6058 perhaps_add_new_callers (node, val);
6059 return false;
6061 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6063 if (dump_file && (dump_flags & TDF_DETAILS))
6064 fprintf (dump_file, " Ignoring candidate value because "
6065 "maximum unit size would be reached with %li.\n",
6066 val->local_size_cost + overall_size);
6067 return false;
6069 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6070 &rec_count_sum, &count_sum))
6071 return false;
6073 if (!dbg_cnt (ipa_cp_values))
6074 return false;
6076 if (val->self_recursion_generated_p ())
6078 /* The edge counts in this case might not have been adjusted yet.
6079 Nevertleless, even if they were it would be only a guesswork which we
6080 can do now. The recursive part of the counts can be derived from the
6081 count of the original node anyway. */
6082 if (node->count.ipa ().nonzero_p ())
6084 unsigned dem = self_gen_clones->length () + 1;
6085 rec_count_sum = node->count.ipa () / dem;
6087 else
6088 rec_count_sum = profile_count::zero ();
6091 /* get_info_about_necessary_edges only sums up ipa counts. */
6092 count_sum += rec_count_sum;
6094 if (dump_file && (dump_flags & TDF_DETAILS))
6096 fprintf (dump_file, " - considering value ");
6097 print_ipcp_constant_value (dump_file, val->value);
6098 fprintf (dump_file, " for ");
6099 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6100 if (offset != -1)
6101 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6102 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6105 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6106 freq_sum, count_sum,
6107 val->local_size_cost)
6108 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6109 freq_sum, count_sum, val->prop_size_cost))
6110 return false;
6112 if (dump_file)
6113 fprintf (dump_file, " Creating a specialized node of %s.\n",
6114 node->dump_name ());
6116 vec<tree> known_csts;
6117 vec<ipa_polymorphic_call_context> known_contexts;
6119 callers = gather_edges_for_value (val, node, caller_count);
6120 if (offset == -1)
6121 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6122 else
6124 known_csts = avals->m_known_vals.copy ();
6125 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6127 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6128 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6129 aggvals = find_aggregate_values_for_callers_subset (node, callers);
6130 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6131 offset, val->value));
6132 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6133 aggvals, callers);
6135 if (val->self_recursion_generated_p ())
6136 self_gen_clones->safe_push (val->spec_node);
6137 else
6138 update_profiling_info (node, val->spec_node);
6140 callers.release ();
6141 overall_size += val->local_size_cost;
6142 if (dump_file && (dump_flags & TDF_DETAILS))
6143 fprintf (dump_file, " overall size reached %li\n",
6144 overall_size);
6146 /* TODO: If for some lattice there is only one other known value
6147 left, make a special node for it too. */
6149 return true;
6152 /* Decide whether and what specialized clones of NODE should be created. */
6154 static bool
6155 decide_whether_version_node (struct cgraph_node *node)
6157 ipa_node_params *info = ipa_node_params_sum->get (node);
6158 int i, count = ipa_get_param_count (info);
6159 bool ret = false;
6161 if (count == 0)
6162 return false;
6164 if (dump_file && (dump_flags & TDF_DETAILS))
6165 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6166 node->dump_name ());
6168 auto_vec <cgraph_node *, 9> self_gen_clones;
6169 ipa_auto_call_arg_values avals;
6170 gather_context_independent_values (info, &avals, false, NULL);
6172 for (i = 0; i < count;i++)
6174 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6175 ipcp_lattice<tree> *lat = &plats->itself;
6176 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6178 if (!lat->bottom
6179 && !avals.m_known_vals[i])
6181 ipcp_value<tree> *val;
6182 for (val = lat->values; val; val = val->next)
6184 /* If some values generated for self-recursive calls with
6185 arithmetic jump functions fall outside of the known
6186 value_range for the parameter, we can skip them. VR interface
6187 supports this only for integers now. */
6188 if (TREE_CODE (val->value) == INTEGER_CST
6189 && !plats->m_value_range.bottom_p ()
6190 && !plats->m_value_range.m_vr.contains_p (val->value))
6192 /* This can happen also if a constant present in the source
6193 code falls outside of the range of parameter's type, so we
6194 cannot assert. */
6195 if (dump_file && (dump_flags & TDF_DETAILS))
6197 fprintf (dump_file, " - skipping%s value ",
6198 val->self_recursion_generated_p ()
6199 ? " self_recursion_generated" : "");
6200 print_ipcp_constant_value (dump_file, val->value);
6201 fprintf (dump_file, " because it is outside known "
6202 "value range.\n");
6204 continue;
6206 ret |= decide_about_value (node, i, -1, val, &avals,
6207 &self_gen_clones);
6211 if (!plats->aggs_bottom)
6213 struct ipcp_agg_lattice *aglat;
6214 ipcp_value<tree> *val;
6215 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6216 if (!aglat->bottom && aglat->values
6217 /* If the following is false, the one value has been considered
6218 for cloning for all contexts. */
6219 && (plats->aggs_contain_variable
6220 || !aglat->is_single_const ()))
6221 for (val = aglat->values; val; val = val->next)
6222 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6223 &self_gen_clones);
6226 if (!ctxlat->bottom
6227 && avals.m_known_contexts[i].useless_p ())
6229 ipcp_value<ipa_polymorphic_call_context> *val;
6230 for (val = ctxlat->values; val; val = val->next)
6231 ret |= decide_about_value (node, i, -1, val, &avals,
6232 &self_gen_clones);
6236 if (!self_gen_clones.is_empty ())
6238 self_gen_clones.safe_push (node);
6239 update_counts_for_self_gen_clones (node, self_gen_clones);
6242 if (info->do_clone_for_all_contexts)
6244 if (!dbg_cnt (ipa_cp_values))
6246 info->do_clone_for_all_contexts = false;
6247 return ret;
6250 struct cgraph_node *clone;
6251 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6253 for (int i = callers.length () - 1; i >= 0; i--)
6255 cgraph_edge *cs = callers[i];
6256 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6258 if (caller_info && caller_info->node_dead)
6259 callers.unordered_remove (i);
6262 if (!adjust_callers_for_value_intersection (callers, node))
6264 /* If node is not called by anyone, or all its caller edges are
6265 self-recursive, the node is not really in use, no need to do
6266 cloning. */
6267 info->do_clone_for_all_contexts = false;
6268 return ret;
6271 if (dump_file)
6272 fprintf (dump_file, " - Creating a specialized node of %s "
6273 "for all known contexts.\n", node->dump_name ());
6275 vec<tree> known_csts = avals.m_known_vals.copy ();
6276 vec<ipa_polymorphic_call_context> known_contexts
6277 = copy_useful_known_contexts (avals.m_known_contexts);
6278 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6279 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6280 ipa_agg_replacement_value *aggvals
6281 = find_aggregate_values_for_callers_subset (node, callers);
6283 if (!known_contexts_useful_p (known_contexts))
6285 known_contexts.release ();
6286 known_contexts = vNULL;
6288 clone = create_specialized_node (node, known_csts, known_contexts,
6289 aggvals, callers);
6290 info->do_clone_for_all_contexts = false;
6291 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6292 ret = true;
6295 return ret;
6298 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6300 static void
6301 spread_undeadness (struct cgraph_node *node)
6303 struct cgraph_edge *cs;
6305 for (cs = node->callees; cs; cs = cs->next_callee)
6306 if (ipa_edge_within_scc (cs))
6308 struct cgraph_node *callee;
6309 class ipa_node_params *info;
6311 callee = cs->callee->function_symbol (NULL);
6312 info = ipa_node_params_sum->get (callee);
6314 if (info && info->node_dead)
6316 info->node_dead = 0;
6317 spread_undeadness (callee);
6322 /* Return true if NODE has a caller from outside of its SCC that is not
6323 dead. Worker callback for cgraph_for_node_and_aliases. */
6325 static bool
6326 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6327 void *data ATTRIBUTE_UNUSED)
6329 struct cgraph_edge *cs;
6331 for (cs = node->callers; cs; cs = cs->next_caller)
6332 if (cs->caller->thunk
6333 && cs->caller->call_for_symbol_thunks_and_aliases
6334 (has_undead_caller_from_outside_scc_p, NULL, true))
6335 return true;
6336 else if (!ipa_edge_within_scc (cs))
6338 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6339 if (!caller_info /* Unoptimized caller are like dead ones. */
6340 || !caller_info->node_dead)
6341 return true;
6343 return false;
6347 /* Identify nodes within the same SCC as NODE which are no longer needed
6348 because of new clones and will be removed as unreachable. */
6350 static void
6351 identify_dead_nodes (struct cgraph_node *node)
6353 struct cgraph_node *v;
6354 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6355 if (v->local)
6357 ipa_node_params *info = ipa_node_params_sum->get (v);
6358 if (info
6359 && !v->call_for_symbol_thunks_and_aliases
6360 (has_undead_caller_from_outside_scc_p, NULL, true))
6361 info->node_dead = 1;
6364 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6366 ipa_node_params *info = ipa_node_params_sum->get (v);
6367 if (info && !info->node_dead)
6368 spread_undeadness (v);
6371 if (dump_file && (dump_flags & TDF_DETAILS))
6373 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6374 if (ipa_node_params_sum->get (v)
6375 && ipa_node_params_sum->get (v)->node_dead)
6376 fprintf (dump_file, " Marking node as dead: %s.\n",
6377 v->dump_name ());
6381 /* The decision stage. Iterate over the topological order of call graph nodes
6382 TOPO and make specialized clones if deemed beneficial. */
6384 static void
6385 ipcp_decision_stage (class ipa_topo_info *topo)
6387 int i;
6389 if (dump_file)
6390 fprintf (dump_file, "\nIPA decision stage:\n\n");
6392 for (i = topo->nnodes - 1; i >= 0; i--)
6394 struct cgraph_node *node = topo->order[i];
6395 bool change = false, iterate = true;
6397 while (iterate)
6399 struct cgraph_node *v;
6400 iterate = false;
6401 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6402 if (v->has_gimple_body_p ()
6403 && ipcp_versionable_function_p (v))
6404 iterate |= decide_whether_version_node (v);
6406 change |= iterate;
6408 if (change)
6409 identify_dead_nodes (node);
6413 /* Look up all the bits information that we have discovered and copy it over
6414 to the transformation summary. */
6416 static void
6417 ipcp_store_bits_results (void)
6419 cgraph_node *node;
6421 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6423 ipa_node_params *info = ipa_node_params_sum->get (node);
6424 bool dumped_sth = false;
6425 bool found_useful_result = false;
6427 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6429 if (dump_file)
6430 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6431 "; -fipa-bit-cp: disabled.\n",
6432 node->dump_name ());
6433 continue;
6436 if (info->ipcp_orig_node)
6437 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6438 if (!info->lattices)
6439 /* Newly expanded artificial thunks do not have lattices. */
6440 continue;
6442 unsigned count = ipa_get_param_count (info);
6443 for (unsigned i = 0; i < count; i++)
6445 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6446 if (plats->bits_lattice.constant_p ())
6448 found_useful_result = true;
6449 break;
6453 if (!found_useful_result)
6454 continue;
6456 ipcp_transformation_initialize ();
6457 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6458 vec_safe_reserve_exact (ts->bits, count);
6460 for (unsigned i = 0; i < count; i++)
6462 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6463 ipa_bits *jfbits;
6465 if (plats->bits_lattice.constant_p ())
6467 jfbits
6468 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6469 plats->bits_lattice.get_mask ());
6470 if (!dbg_cnt (ipa_cp_bits))
6471 jfbits = NULL;
6473 else
6474 jfbits = NULL;
6476 ts->bits->quick_push (jfbits);
6477 if (!dump_file || !jfbits)
6478 continue;
6479 if (!dumped_sth)
6481 fprintf (dump_file, "Propagated bits info for function %s:\n",
6482 node->dump_name ());
6483 dumped_sth = true;
6485 fprintf (dump_file, " param %i: value = ", i);
6486 print_hex (jfbits->value, dump_file);
6487 fprintf (dump_file, ", mask = ");
6488 print_hex (jfbits->mask, dump_file);
6489 fprintf (dump_file, "\n");
6494 /* Look up all VR information that we have discovered and copy it over
6495 to the transformation summary. */
6497 static void
6498 ipcp_store_vr_results (void)
6500 cgraph_node *node;
6502 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6504 ipa_node_params *info = ipa_node_params_sum->get (node);
6505 bool found_useful_result = false;
6507 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6509 if (dump_file)
6510 fprintf (dump_file, "Not considering %s for VR discovery "
6511 "and propagate; -fipa-ipa-vrp: disabled.\n",
6512 node->dump_name ());
6513 continue;
6516 if (info->ipcp_orig_node)
6517 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6518 if (!info->lattices)
6519 /* Newly expanded artificial thunks do not have lattices. */
6520 continue;
6522 unsigned count = ipa_get_param_count (info);
6523 for (unsigned i = 0; i < count; i++)
6525 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6526 if (!plats->m_value_range.bottom_p ()
6527 && !plats->m_value_range.top_p ())
6529 found_useful_result = true;
6530 break;
6533 if (!found_useful_result)
6534 continue;
6536 ipcp_transformation_initialize ();
6537 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6538 vec_safe_reserve_exact (ts->m_vr, count);
6540 for (unsigned i = 0; i < count; i++)
6542 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6543 ipa_vr vr;
6545 if (!plats->m_value_range.bottom_p ()
6546 && !plats->m_value_range.top_p ()
6547 && dbg_cnt (ipa_cp_vr))
6549 vr.known = true;
6550 vr.type = plats->m_value_range.m_vr.kind ();
6551 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6552 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6554 else
6556 vr.known = false;
6557 vr.type = VR_VARYING;
6558 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6560 ts->m_vr->quick_push (vr);
6565 /* The IPCP driver. */
6567 static unsigned int
6568 ipcp_driver (void)
6570 class ipa_topo_info topo;
6572 if (edge_clone_summaries == NULL)
6573 edge_clone_summaries = new edge_clone_summary_t (symtab);
6575 ipa_check_create_node_params ();
6576 ipa_check_create_edge_args ();
6577 clone_num_suffixes = new hash_map<const char *, unsigned>;
6579 if (dump_file)
6581 fprintf (dump_file, "\nIPA structures before propagation:\n");
6582 if (dump_flags & TDF_DETAILS)
6583 ipa_print_all_params (dump_file);
6584 ipa_print_all_jump_functions (dump_file);
6587 /* Topological sort. */
6588 build_toporder_info (&topo);
6589 /* Do the interprocedural propagation. */
6590 ipcp_propagate_stage (&topo);
6591 /* Decide what constant propagation and cloning should be performed. */
6592 ipcp_decision_stage (&topo);
6593 /* Store results of bits propagation. */
6594 ipcp_store_bits_results ();
6595 /* Store results of value range propagation. */
6596 ipcp_store_vr_results ();
6598 /* Free all IPCP structures. */
6599 delete clone_num_suffixes;
6600 free_toporder_info (&topo);
6601 delete edge_clone_summaries;
6602 edge_clone_summaries = NULL;
6603 ipa_free_all_structures_after_ipa_cp ();
6604 if (dump_file)
6605 fprintf (dump_file, "\nIPA constant propagation end\n");
6606 return 0;
6609 /* Initialization and computation of IPCP data structures. This is the initial
6610 intraprocedural analysis of functions, which gathers information to be
6611 propagated later on. */
6613 static void
6614 ipcp_generate_summary (void)
6616 struct cgraph_node *node;
6618 if (dump_file)
6619 fprintf (dump_file, "\nIPA constant propagation start:\n");
6620 ipa_register_cgraph_hooks ();
6622 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6623 ipa_analyze_node (node);
6626 namespace {
6628 const pass_data pass_data_ipa_cp =
6630 IPA_PASS, /* type */
6631 "cp", /* name */
6632 OPTGROUP_NONE, /* optinfo_flags */
6633 TV_IPA_CONSTANT_PROP, /* tv_id */
6634 0, /* properties_required */
6635 0, /* properties_provided */
6636 0, /* properties_destroyed */
6637 0, /* todo_flags_start */
6638 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6641 class pass_ipa_cp : public ipa_opt_pass_d
6643 public:
6644 pass_ipa_cp (gcc::context *ctxt)
6645 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6646 ipcp_generate_summary, /* generate_summary */
6647 NULL, /* write_summary */
6648 NULL, /* read_summary */
6649 ipcp_write_transformation_summaries, /*
6650 write_optimization_summary */
6651 ipcp_read_transformation_summaries, /*
6652 read_optimization_summary */
6653 NULL, /* stmt_fixup */
6654 0, /* function_transform_todo_flags_start */
6655 ipcp_transform_function, /* function_transform */
6656 NULL) /* variable_transform */
6659 /* opt_pass methods: */
6660 bool gate (function *) final override
6662 /* FIXME: We should remove the optimize check after we ensure we never run
6663 IPA passes when not optimizing. */
6664 return (flag_ipa_cp && optimize) || in_lto_p;
6667 unsigned int execute (function *) final override { return ipcp_driver (); }
6669 }; // class pass_ipa_cp
6671 } // anon namespace
6673 ipa_opt_pass_d *
6674 make_pass_ipa_cp (gcc::context *ctxt)
6676 return new pass_ipa_cp (ctxt);
6679 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6680 within the same process. For use by toplev::finalize. */
6682 void
6683 ipa_cp_cc_finalize (void)
6685 base_count = profile_count::uninitialized ();
6686 overall_size = 0;
6687 orig_overall_size = 0;
6688 ipcp_free_transformation_sum ();