Support TI mode and soft float on PA64
[official-gcc.git] / gcc / ipa-cp.c
blob703541d15cc275244ca6418efa3c88f7ae53c84c
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
101 the second stage. */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
130 template <typename valtype> class ipcp_value;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype>
135 struct ipcp_value_source
137 public:
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset;
141 /* The incoming edge that brought the value. */
142 cgraph_edge *cs;
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value<valtype> *val;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source *next;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
152 int index;
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
159 public:
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit;
163 /* Time benefit that specializing the function for this value can bring about
164 in it's callees. */
165 sreal prop_time_benefit;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
168 int local_size_cost;
169 /* Size cost that specializing the function for this value can bring about in
170 it's callees. */
171 int prop_size_cost;
173 ipcp_value_base ()
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
183 public:
184 /* The actual value for the given parameter. */
185 valtype value;
186 /* The list of sources from which this value originates. */
187 ipcp_value_source <valtype> *sources = nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value *next = nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
191 of values. */
192 ipcp_value *scc_next = nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value *topo_next = nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
197 created. */
198 cgraph_node *spec_node = nullptr;
199 /* Depth first search number and low link for topological sorting of
200 values. */
201 int dfs = 0;
202 int low_link = 0;
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
205 int scc_no = 0;
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
210 maximum. */
211 unsigned self_recursion_generated_level = 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack = false;
215 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216 HOST_WIDE_INT offset);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value<valtype> *o)
222 return o->scc_no == scc_no;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level > 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype>
242 struct ipcp_lattice
244 public:
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value<valtype> *values;
249 /* Number of known values and types in this lattice. */
250 int values_count;
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
254 propagation). */
255 bool bottom;
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval, cgraph_edge *cs,
261 ipcp_value<valtype> *src_val = NULL,
262 int src_idx = 0, HOST_WIDE_INT offset = -1,
263 ipcp_value<valtype> **val_p = NULL,
264 unsigned same_lat_gen_level = 0);
265 void print (FILE * f, bool dump_sources, bool dump_benefits);
268 /* Lattice of tree values with an offset to describe a part of an
269 aggregate. */
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
273 public:
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
278 HOST_WIDE_INT size;
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice *next;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
285 value are constant.
286 For eg:
287 int f(int x)
289 return some_op (x);
292 int f1(int y)
294 if (cond)
295 return f (y & 0xff);
296 else
297 return f (y & 0xf);
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
308 public:
309 bool bottom_p () { return m_lattice_val == IPA_BITS_VARYING; }
310 bool top_p () { return m_lattice_val == IPA_BITS_UNDEFINED; }
311 bool constant_p () { return m_lattice_val == IPA_BITS_CONSTANT; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int, widest_int);
315 widest_int get_value () { return m_value; }
316 widest_int get_mask () { return m_mask; }
318 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
319 enum tree_code, tree);
321 bool meet_with (widest_int, widest_int, unsigned);
323 void print (FILE *);
325 private:
326 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
328 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
329 If a bit in mask is set to 0, then the corresponding bit in
330 value is known to be constant. */
331 widest_int m_value, m_mask;
333 bool meet_with_1 (widest_int, widest_int, unsigned);
334 void get_value_and_mask (tree, widest_int *, widest_int *);
337 /* Lattice of value ranges. */
339 class ipcp_vr_lattice
341 public:
342 value_range m_vr;
344 inline bool bottom_p () const;
345 inline bool top_p () const;
346 inline bool set_to_bottom ();
347 bool meet_with (const value_range *p_vr);
348 bool meet_with (const ipcp_vr_lattice &other);
349 void init () { gcc_assert (m_vr.undefined_p ()); }
350 void print (FILE * f);
352 private:
353 bool meet_with_1 (const value_range *other_vr);
356 /* Structure containing lattices for a parameter itself and for pieces of
357 aggregates that are passed in the parameter or by a reference in a parameter
358 plus some other useful flags. */
360 class ipcp_param_lattices
362 public:
363 /* Lattice describing the value of the parameter itself. */
364 ipcp_lattice<tree> itself;
365 /* Lattice describing the polymorphic contexts of a parameter. */
366 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
367 /* Lattices describing aggregate parts. */
368 ipcp_agg_lattice *aggs;
369 /* Lattice describing known bits. */
370 ipcp_bits_lattice bits_lattice;
371 /* Lattice describing value range. */
372 ipcp_vr_lattice m_value_range;
373 /* Number of aggregate lattices */
374 int aggs_count;
375 /* True if aggregate data were passed by reference (as opposed to by
376 value). */
377 bool aggs_by_ref;
378 /* All aggregate lattices contain a variable component (in addition to
379 values). */
380 bool aggs_contain_variable;
381 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
382 for any propagation). */
383 bool aggs_bottom;
385 /* There is a virtual call based on this parameter. */
386 bool virt_call;
389 /* Allocation pools for values and their sources in ipa-cp. */
391 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
392 ("IPA-CP constant values");
394 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
395 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
398 ("IPA-CP value sources");
400 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
401 ("IPA_CP aggregate lattices");
403 /* Base count to use in heuristics when using profile feedback. */
405 static profile_count base_count;
407 /* Original overall size of the program. */
409 static long overall_size, orig_overall_size;
411 /* Node name to unique clone suffix number map. */
412 static hash_map<const char *, unsigned> *clone_num_suffixes;
414 /* Return the param lattices structure corresponding to the Ith formal
415 parameter of the function described by INFO. */
416 static inline class ipcp_param_lattices *
417 ipa_get_parm_lattices (class ipa_node_params *info, int i)
419 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
420 gcc_checking_assert (!info->ipcp_orig_node);
421 gcc_checking_assert (info->lattices);
422 return &(info->lattices[i]);
425 /* Return the lattice corresponding to the scalar value of the Ith formal
426 parameter of the function described by INFO. */
427 static inline ipcp_lattice<tree> *
428 ipa_get_scalar_lat (class ipa_node_params *info, int i)
430 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
431 return &plats->itself;
434 /* Return the lattice corresponding to the scalar value of the Ith formal
435 parameter of the function described by INFO. */
436 static inline ipcp_lattice<ipa_polymorphic_call_context> *
437 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
439 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
440 return &plats->ctxlat;
443 /* Return whether LAT is a lattice with a single constant and without an
444 undefined value. */
446 template <typename valtype>
447 inline bool
448 ipcp_lattice<valtype>::is_single_const ()
450 if (bottom || contains_variable || values_count != 1)
451 return false;
452 else
453 return true;
456 /* Print V which is extracted from a value in a lattice to F. */
458 static void
459 print_ipcp_constant_value (FILE * f, tree v)
461 if (TREE_CODE (v) == ADDR_EXPR
462 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
464 fprintf (f, "& ");
465 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
467 else
468 print_generic_expr (f, v);
471 /* Print V which is extracted from a value in a lattice to F. */
473 static void
474 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
476 v.dump(f, false);
479 /* Print a lattice LAT to F. */
481 template <typename valtype>
482 void
483 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
485 ipcp_value<valtype> *val;
486 bool prev = false;
488 if (bottom)
490 fprintf (f, "BOTTOM\n");
491 return;
494 if (!values_count && !contains_variable)
496 fprintf (f, "TOP\n");
497 return;
500 if (contains_variable)
502 fprintf (f, "VARIABLE");
503 prev = true;
504 if (dump_benefits)
505 fprintf (f, "\n");
508 for (val = values; val; val = val->next)
510 if (dump_benefits && prev)
511 fprintf (f, " ");
512 else if (!dump_benefits && prev)
513 fprintf (f, ", ");
514 else
515 prev = true;
517 print_ipcp_constant_value (f, val->value);
519 if (dump_sources)
521 ipcp_value_source<valtype> *s;
523 if (val->self_recursion_generated_p ())
524 fprintf (f, " [self_gen(%i), from:",
525 val->self_recursion_generated_level);
526 else
527 fprintf (f, " [scc: %i, from:", val->scc_no);
528 for (s = val->sources; s; s = s->next)
529 fprintf (f, " %i(%f)", s->cs->caller->order,
530 s->cs->sreal_frequency ().to_double ());
531 fprintf (f, "]");
534 if (dump_benefits)
535 fprintf (f, " [loc_time: %g, loc_size: %i, "
536 "prop_time: %g, prop_size: %i]\n",
537 val->local_time_benefit.to_double (), val->local_size_cost,
538 val->prop_time_benefit.to_double (), val->prop_size_cost);
540 if (!dump_benefits)
541 fprintf (f, "\n");
544 void
545 ipcp_bits_lattice::print (FILE *f)
547 if (top_p ())
548 fprintf (f, " Bits unknown (TOP)\n");
549 else if (bottom_p ())
550 fprintf (f, " Bits unusable (BOTTOM)\n");
551 else
553 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
554 fprintf (f, ", mask = "); print_hex (get_mask (), f);
555 fprintf (f, "\n");
559 /* Print value range lattice to F. */
561 void
562 ipcp_vr_lattice::print (FILE * f)
564 dump_value_range (f, &m_vr);
567 /* Print all ipcp_lattices of all functions to F. */
569 static void
570 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
572 struct cgraph_node *node;
573 int i, count;
575 fprintf (f, "\nLattices:\n");
576 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
578 class ipa_node_params *info;
580 info = ipa_node_params_sum->get (node);
581 /* Skip unoptimized functions and constprop clones since we don't make
582 lattices for them. */
583 if (!info || info->ipcp_orig_node)
584 continue;
585 fprintf (f, " Node: %s:\n", node->dump_name ());
586 count = ipa_get_param_count (info);
587 for (i = 0; i < count; i++)
589 struct ipcp_agg_lattice *aglat;
590 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
591 fprintf (f, " param [%d]: ", i);
592 plats->itself.print (f, dump_sources, dump_benefits);
593 fprintf (f, " ctxs: ");
594 plats->ctxlat.print (f, dump_sources, dump_benefits);
595 plats->bits_lattice.print (f);
596 fprintf (f, " ");
597 plats->m_value_range.print (f);
598 fprintf (f, "\n");
599 if (plats->virt_call)
600 fprintf (f, " virt_call flag set\n");
602 if (plats->aggs_bottom)
604 fprintf (f, " AGGS BOTTOM\n");
605 continue;
607 if (plats->aggs_contain_variable)
608 fprintf (f, " AGGS VARIABLE\n");
609 for (aglat = plats->aggs; aglat; aglat = aglat->next)
611 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
612 plats->aggs_by_ref ? "ref " : "", aglat->offset);
613 aglat->print (f, dump_sources, dump_benefits);
619 /* Determine whether it is at all technically possible to create clones of NODE
620 and store this information in the ipa_node_params structure associated
621 with NODE. */
623 static void
624 determine_versionability (struct cgraph_node *node,
625 class ipa_node_params *info)
627 const char *reason = NULL;
629 /* There are a number of generic reasons functions cannot be versioned. We
630 also cannot remove parameters if there are type attributes such as fnspec
631 present. */
632 if (node->alias || node->thunk)
633 reason = "alias or thunk";
634 else if (!node->versionable)
635 reason = "not a tree_versionable_function";
636 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
637 reason = "insufficient body availability";
638 else if (!opt_for_fn (node->decl, optimize)
639 || !opt_for_fn (node->decl, flag_ipa_cp))
640 reason = "non-optimized function";
641 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
643 /* Ideally we should clone the SIMD clones themselves and create
644 vector copies of them, so IPA-cp and SIMD clones can happily
645 coexist, but that may not be worth the effort. */
646 reason = "function has SIMD clones";
648 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
650 /* Ideally we should clone the target clones themselves and create
651 copies of them, so IPA-cp and target clones can happily
652 coexist, but that may not be worth the effort. */
653 reason = "function target_clones attribute";
655 /* Don't clone decls local to a comdat group; it breaks and for C++
656 decloned constructors, inlining is always better anyway. */
657 else if (node->comdat_local_p ())
658 reason = "comdat-local function";
659 else if (node->calls_comdat_local)
661 /* TODO: call is versionable if we make sure that all
662 callers are inside of a comdat group. */
663 reason = "calls comdat-local function";
666 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
667 work only when inlined. Cloning them may still lead to better code
668 because ipa-cp will not give up on cloning further. If the function is
669 external this however leads to wrong code because we may end up producing
670 offline copy of the function. */
671 if (DECL_EXTERNAL (node->decl))
672 for (cgraph_edge *edge = node->callees; !reason && edge;
673 edge = edge->next_callee)
674 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
676 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
677 reason = "external function which calls va_arg_pack";
678 if (DECL_FUNCTION_CODE (edge->callee->decl)
679 == BUILT_IN_VA_ARG_PACK_LEN)
680 reason = "external function which calls va_arg_pack_len";
683 if (reason && dump_file && !node->alias && !node->thunk)
684 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
685 node->dump_name (), reason);
687 info->versionable = (reason == NULL);
690 /* Return true if it is at all technically possible to create clones of a
691 NODE. */
693 static bool
694 ipcp_versionable_function_p (struct cgraph_node *node)
696 ipa_node_params *info = ipa_node_params_sum->get (node);
697 return info && info->versionable;
700 /* Structure holding accumulated information about callers of a node. */
702 struct caller_statistics
704 /* If requested (see below), self-recursive call counts are summed into this
705 field. */
706 profile_count rec_count_sum;
707 /* The sum of all ipa counts of all the other (non-recursive) calls. */
708 profile_count count_sum;
709 /* Sum of all frequencies for all calls. */
710 sreal freq_sum;
711 /* Number of calls and hot calls respectively. */
712 int n_calls, n_hot_calls;
713 /* If itself is set up, also count the number of non-self-recursive
714 calls. */
715 int n_nonrec_calls;
716 /* If non-NULL, this is the node itself and calls from it should have their
717 counts included in rec_count_sum and not count_sum. */
718 cgraph_node *itself;
721 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
722 from IGNORED_CALLER are not counted. */
724 static inline void
725 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
727 stats->rec_count_sum = profile_count::zero ();
728 stats->count_sum = profile_count::zero ();
729 stats->n_calls = 0;
730 stats->n_hot_calls = 0;
731 stats->n_nonrec_calls = 0;
732 stats->freq_sum = 0;
733 stats->itself = itself;
736 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
737 non-thunk incoming edges to NODE. */
739 static bool
740 gather_caller_stats (struct cgraph_node *node, void *data)
742 struct caller_statistics *stats = (struct caller_statistics *) data;
743 struct cgraph_edge *cs;
745 for (cs = node->callers; cs; cs = cs->next_caller)
746 if (!cs->caller->thunk)
748 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
749 if (info && info->node_dead)
750 continue;
752 if (cs->count.ipa ().initialized_p ())
754 if (stats->itself && stats->itself == cs->caller)
755 stats->rec_count_sum += cs->count.ipa ();
756 else
757 stats->count_sum += cs->count.ipa ();
759 stats->freq_sum += cs->sreal_frequency ();
760 stats->n_calls++;
761 if (stats->itself && stats->itself != cs->caller)
762 stats->n_nonrec_calls++;
764 if (cs->maybe_hot_p ())
765 stats->n_hot_calls ++;
767 return false;
771 /* Return true if this NODE is viable candidate for cloning. */
773 static bool
774 ipcp_cloning_candidate_p (struct cgraph_node *node)
776 struct caller_statistics stats;
778 gcc_checking_assert (node->has_gimple_body_p ());
780 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
782 if (dump_file)
783 fprintf (dump_file, "Not considering %s for cloning; "
784 "-fipa-cp-clone disabled.\n",
785 node->dump_name ());
786 return false;
789 if (node->optimize_for_size_p ())
791 if (dump_file)
792 fprintf (dump_file, "Not considering %s for cloning; "
793 "optimizing it for size.\n",
794 node->dump_name ());
795 return false;
798 init_caller_stats (&stats);
799 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
801 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
803 if (dump_file)
804 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
805 node->dump_name ());
806 return true;
809 /* When profile is available and function is hot, propagate into it even if
810 calls seems cold; constant propagation can improve function's speed
811 significantly. */
812 if (stats.count_sum > profile_count::zero ()
813 && node->count.ipa ().initialized_p ())
815 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
817 if (dump_file)
818 fprintf (dump_file, "Considering %s for cloning; "
819 "usually called directly.\n",
820 node->dump_name ());
821 return true;
824 if (!stats.n_hot_calls)
826 if (dump_file)
827 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
828 node->dump_name ());
829 return false;
831 if (dump_file)
832 fprintf (dump_file, "Considering %s for cloning.\n",
833 node->dump_name ());
834 return true;
837 template <typename valtype>
838 class value_topo_info
840 public:
841 /* Head of the linked list of topologically sorted values. */
842 ipcp_value<valtype> *values_topo;
843 /* Stack for creating SCCs, represented by a linked list too. */
844 ipcp_value<valtype> *stack;
845 /* Counter driving the algorithm in add_val_to_toposort. */
846 int dfs_counter;
848 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
850 void add_val (ipcp_value<valtype> *cur_val);
851 void propagate_effects ();
854 /* Arrays representing a topological ordering of call graph nodes and a stack
855 of nodes used during constant propagation and also data required to perform
856 topological sort of values and propagation of benefits in the determined
857 order. */
859 class ipa_topo_info
861 public:
862 /* Array with obtained topological order of cgraph nodes. */
863 struct cgraph_node **order;
864 /* Stack of cgraph nodes used during propagation within SCC until all values
865 in the SCC stabilize. */
866 struct cgraph_node **stack;
867 int nnodes, stack_top;
869 value_topo_info<tree> constants;
870 value_topo_info<ipa_polymorphic_call_context> contexts;
872 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
873 constants ()
877 /* Skip edges from and to nodes without ipa_cp enabled.
878 Ignore not available symbols. */
880 static bool
881 ignore_edge_p (cgraph_edge *e)
883 enum availability avail;
884 cgraph_node *ultimate_target
885 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
887 return (avail <= AVAIL_INTERPOSABLE
888 || !opt_for_fn (ultimate_target->decl, optimize)
889 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
892 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
894 static void
895 build_toporder_info (class ipa_topo_info *topo)
897 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
898 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
900 gcc_checking_assert (topo->stack_top == 0);
901 topo->nnodes = ipa_reduced_postorder (topo->order, true,
902 ignore_edge_p);
905 /* Free information about strongly connected components and the arrays in
906 TOPO. */
908 static void
909 free_toporder_info (class ipa_topo_info *topo)
911 ipa_free_postorder_info ();
912 free (topo->order);
913 free (topo->stack);
916 /* Add NODE to the stack in TOPO, unless it is already there. */
918 static inline void
919 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
921 ipa_node_params *info = ipa_node_params_sum->get (node);
922 if (info->node_enqueued)
923 return;
924 info->node_enqueued = 1;
925 topo->stack[topo->stack_top++] = node;
928 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
929 is empty. */
931 static struct cgraph_node *
932 pop_node_from_stack (class ipa_topo_info *topo)
934 if (topo->stack_top)
936 struct cgraph_node *node;
937 topo->stack_top--;
938 node = topo->stack[topo->stack_top];
939 ipa_node_params_sum->get (node)->node_enqueued = 0;
940 return node;
942 else
943 return NULL;
946 /* Set lattice LAT to bottom and return true if it previously was not set as
947 such. */
949 template <typename valtype>
950 inline bool
951 ipcp_lattice<valtype>::set_to_bottom ()
953 bool ret = !bottom;
954 bottom = true;
955 return ret;
958 /* Mark lattice as containing an unknown value and return true if it previously
959 was not marked as such. */
961 template <typename valtype>
962 inline bool
963 ipcp_lattice<valtype>::set_contains_variable ()
965 bool ret = !contains_variable;
966 contains_variable = true;
967 return ret;
970 /* Set all aggregate lattices in PLATS to bottom and return true if they were
971 not previously set as such. */
973 static inline bool
974 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
976 bool ret = !plats->aggs_bottom;
977 plats->aggs_bottom = true;
978 return ret;
981 /* Mark all aggregate lattices in PLATS as containing an unknown value and
982 return true if they were not previously marked as such. */
984 static inline bool
985 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
987 bool ret = !plats->aggs_contain_variable;
988 plats->aggs_contain_variable = true;
989 return ret;
992 bool
993 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
995 return meet_with_1 (&other.m_vr);
998 /* Meet the current value of the lattice with value range described by VR
999 lattice. */
1001 bool
1002 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1004 return meet_with_1 (p_vr);
1007 /* Meet the current value of the lattice with value range described by
1008 OTHER_VR lattice. Return TRUE if anything changed. */
1010 bool
1011 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1013 if (bottom_p ())
1014 return false;
1016 if (other_vr->varying_p ())
1017 return set_to_bottom ();
1019 value_range save (m_vr);
1020 m_vr.union_ (other_vr);
1021 return !m_vr.equal_p (save);
1024 /* Return true if value range information in the lattice is yet unknown. */
1026 bool
1027 ipcp_vr_lattice::top_p () const
1029 return m_vr.undefined_p ();
1032 /* Return true if value range information in the lattice is known to be
1033 unusable. */
1035 bool
1036 ipcp_vr_lattice::bottom_p () const
1038 return m_vr.varying_p ();
1041 /* Set value range information in the lattice to bottom. Return true if it
1042 previously was in a different state. */
1044 bool
1045 ipcp_vr_lattice::set_to_bottom ()
1047 if (m_vr.varying_p ())
1048 return false;
1049 /* ?? We create all sorts of VARYING ranges for floats, structures,
1050 and other types which we cannot handle as ranges. We should
1051 probably avoid handling them throughout the pass, but it's easier
1052 to create a sensible VARYING here and let the lattice
1053 propagate. */
1054 m_vr.set_varying (integer_type_node);
1055 return true;
1058 /* Set lattice value to bottom, if it already isn't the case. */
1060 bool
1061 ipcp_bits_lattice::set_to_bottom ()
1063 if (bottom_p ())
1064 return false;
1065 m_lattice_val = IPA_BITS_VARYING;
1066 m_value = 0;
1067 m_mask = -1;
1068 return true;
1071 /* Set to constant if it isn't already. Only meant to be called
1072 when switching state from TOP. */
1074 bool
1075 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1077 gcc_assert (top_p ());
1078 m_lattice_val = IPA_BITS_CONSTANT;
1079 m_value = wi::bit_and (wi::bit_not (mask), value);
1080 m_mask = mask;
1081 return true;
1084 /* Convert operand to value, mask form. */
1086 void
1087 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1089 wide_int get_nonzero_bits (const_tree);
1091 if (TREE_CODE (operand) == INTEGER_CST)
1093 *valuep = wi::to_widest (operand);
1094 *maskp = 0;
1096 else
1098 *valuep = 0;
1099 *maskp = -1;
1103 /* Meet operation, similar to ccp_lattice_meet, we xor values
1104 if this->value, value have different values at same bit positions, we want
1105 to drop that bit to varying. Return true if mask is changed.
1106 This function assumes that the lattice value is in CONSTANT state */
1108 bool
1109 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1110 unsigned precision)
1112 gcc_assert (constant_p ());
1114 widest_int old_mask = m_mask;
1115 m_mask = (m_mask | mask) | (m_value ^ value);
1116 m_value &= ~m_mask;
1118 if (wi::sext (m_mask, precision) == -1)
1119 return set_to_bottom ();
1121 return m_mask != old_mask;
1124 /* Meet the bits lattice with operand
1125 described by <value, mask, sgn, precision. */
1127 bool
1128 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1129 unsigned precision)
1131 if (bottom_p ())
1132 return false;
1134 if (top_p ())
1136 if (wi::sext (mask, precision) == -1)
1137 return set_to_bottom ();
1138 return set_to_constant (value, mask);
1141 return meet_with_1 (value, mask, precision);
1144 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1145 if code is binary operation or bit_value_unop (other) if code is unary op.
1146 In the case when code is nop_expr, no adjustment is required. */
1148 bool
1149 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1150 signop sgn, enum tree_code code, tree operand)
1152 if (other.bottom_p ())
1153 return set_to_bottom ();
1155 if (bottom_p () || other.top_p ())
1156 return false;
1158 widest_int adjusted_value, adjusted_mask;
1160 if (TREE_CODE_CLASS (code) == tcc_binary)
1162 tree type = TREE_TYPE (operand);
1163 widest_int o_value, o_mask;
1164 get_value_and_mask (operand, &o_value, &o_mask);
1166 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1167 sgn, precision, other.get_value (), other.get_mask (),
1168 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1170 if (wi::sext (adjusted_mask, precision) == -1)
1171 return set_to_bottom ();
1174 else if (TREE_CODE_CLASS (code) == tcc_unary)
1176 bit_value_unop (code, sgn, precision, &adjusted_value,
1177 &adjusted_mask, sgn, precision, other.get_value (),
1178 other.get_mask ());
1180 if (wi::sext (adjusted_mask, precision) == -1)
1181 return set_to_bottom ();
1184 else
1185 return set_to_bottom ();
1187 if (top_p ())
1189 if (wi::sext (adjusted_mask, precision) == -1)
1190 return set_to_bottom ();
1191 return set_to_constant (adjusted_value, adjusted_mask);
1193 else
1194 return meet_with_1 (adjusted_value, adjusted_mask, precision);
1197 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1198 return true is any of them has not been marked as such so far. */
1200 static inline bool
1201 set_all_contains_variable (class ipcp_param_lattices *plats)
1203 bool ret;
1204 ret = plats->itself.set_contains_variable ();
1205 ret |= plats->ctxlat.set_contains_variable ();
1206 ret |= set_agg_lats_contain_variable (plats);
1207 ret |= plats->bits_lattice.set_to_bottom ();
1208 ret |= plats->m_value_range.set_to_bottom ();
1209 return ret;
1212 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1213 points to by the number of callers to NODE. */
1215 static bool
1216 count_callers (cgraph_node *node, void *data)
1218 int *caller_count = (int *) data;
1220 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1221 /* Local thunks can be handled transparently, but if the thunk cannot
1222 be optimized out, count it as a real use. */
1223 if (!cs->caller->thunk || !cs->caller->local)
1224 ++*caller_count;
1225 return false;
1228 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1229 the one caller of some other node. Set the caller's corresponding flag. */
1231 static bool
1232 set_single_call_flag (cgraph_node *node, void *)
1234 cgraph_edge *cs = node->callers;
1235 /* Local thunks can be handled transparently, skip them. */
1236 while (cs && cs->caller->thunk && cs->caller->local)
1237 cs = cs->next_caller;
1238 if (cs)
1239 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1241 info->node_calling_single_call = true;
1242 return true;
1244 return false;
1247 /* Initialize ipcp_lattices. */
1249 static void
1250 initialize_node_lattices (struct cgraph_node *node)
1252 ipa_node_params *info = ipa_node_params_sum->get (node);
1253 struct cgraph_edge *ie;
1254 bool disable = false, variable = false;
1255 int i;
1257 gcc_checking_assert (node->has_gimple_body_p ());
1259 if (!ipa_get_param_count (info))
1260 disable = true;
1261 else if (node->local)
1263 int caller_count = 0;
1264 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1265 true);
1266 gcc_checking_assert (caller_count > 0);
1267 if (caller_count == 1)
1268 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1269 NULL, true);
1271 else
1273 /* When cloning is allowed, we can assume that externally visible
1274 functions are not called. We will compensate this by cloning
1275 later. */
1276 if (ipcp_versionable_function_p (node)
1277 && ipcp_cloning_candidate_p (node))
1278 variable = true;
1279 else
1280 disable = true;
1283 if (dump_file && (dump_flags & TDF_DETAILS)
1284 && !node->alias && !node->thunk)
1286 fprintf (dump_file, "Initializing lattices of %s\n",
1287 node->dump_name ());
1288 if (disable || variable)
1289 fprintf (dump_file, " Marking all lattices as %s\n",
1290 disable ? "BOTTOM" : "VARIABLE");
1293 auto_vec<bool, 16> surviving_params;
1294 bool pre_modified = false;
1296 clone_info *cinfo = clone_info::get (node);
1298 if (!disable && cinfo && cinfo->param_adjustments)
1300 /* At the moment all IPA optimizations should use the number of
1301 parameters of the prevailing decl as the m_always_copy_start.
1302 Handling any other value would complicate the code below, so for the
1303 time bing let's only assert it is so. */
1304 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1305 == ipa_get_param_count (info))
1306 || cinfo->param_adjustments->m_always_copy_start < 0);
1308 pre_modified = true;
1309 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1311 if (dump_file && (dump_flags & TDF_DETAILS)
1312 && !node->alias && !node->thunk)
1314 bool first = true;
1315 for (int j = 0; j < ipa_get_param_count (info); j++)
1317 if (j < (int) surviving_params.length ()
1318 && surviving_params[j])
1319 continue;
1320 if (first)
1322 fprintf (dump_file,
1323 " The following parameters are dead on arrival:");
1324 first = false;
1326 fprintf (dump_file, " %u", j);
1328 if (!first)
1329 fprintf (dump_file, "\n");
1333 for (i = 0; i < ipa_get_param_count (info); i++)
1335 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1336 if (disable
1337 || !ipa_get_type (info, i)
1338 || (pre_modified && (surviving_params.length () <= (unsigned) i
1339 || !surviving_params[i])))
1341 plats->itself.set_to_bottom ();
1342 plats->ctxlat.set_to_bottom ();
1343 set_agg_lats_to_bottom (plats);
1344 plats->bits_lattice.set_to_bottom ();
1345 plats->m_value_range.m_vr = value_range ();
1346 plats->m_value_range.set_to_bottom ();
1348 else
1350 plats->m_value_range.init ();
1351 if (variable)
1352 set_all_contains_variable (plats);
1356 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1357 if (ie->indirect_info->polymorphic
1358 && ie->indirect_info->param_index >= 0)
1360 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1361 ipa_get_parm_lattices (info,
1362 ie->indirect_info->param_index)->virt_call = 1;
1366 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1367 PARAM_TYPE. */
1369 static bool
1370 ipacp_value_safe_for_type (tree param_type, tree value)
1372 tree val_type = TREE_TYPE (value);
1373 if (param_type == val_type
1374 || useless_type_conversion_p (param_type, val_type)
1375 || fold_convertible_p (param_type, value))
1376 return true;
1377 else
1378 return false;
1381 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1383 static bool
1384 values_equal_for_ipcp_p (tree x, tree y)
1386 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1388 if (x == y)
1389 return true;
1391 if (TREE_CODE (x) == ADDR_EXPR
1392 && TREE_CODE (y) == ADDR_EXPR
1393 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1394 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1395 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1396 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1397 else
1398 return operand_equal_p (x, y, 0);
1401 /* Return the result of a (possibly arithmetic) operation on the constant
1402 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1403 the type of the parameter to which the result is passed. Return
1404 NULL_TREE if that cannot be determined or be considered an
1405 interprocedural invariant. */
1407 static tree
1408 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1409 tree res_type)
1411 tree res;
1413 if (opcode == NOP_EXPR)
1414 return input;
1415 if (!is_gimple_ip_invariant (input))
1416 return NULL_TREE;
1418 if (opcode == ASSERT_EXPR)
1420 if (values_equal_for_ipcp_p (input, operand))
1421 return input;
1422 else
1423 return NULL_TREE;
1426 if (!res_type)
1428 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1429 res_type = boolean_type_node;
1430 else if (expr_type_first_operand_type_p (opcode))
1431 res_type = TREE_TYPE (input);
1432 else
1433 return NULL_TREE;
1436 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1437 res = fold_unary (opcode, res_type, input);
1438 else
1439 res = fold_binary (opcode, res_type, input, operand);
1441 if (res && !is_gimple_ip_invariant (res))
1442 return NULL_TREE;
1444 return res;
1447 /* Return the result of a (possibly arithmetic) pass through jump function
1448 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1449 to which the result is passed. Return NULL_TREE if that cannot be
1450 determined or be considered an interprocedural invariant. */
1452 static tree
1453 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1454 tree res_type)
1456 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1457 input,
1458 ipa_get_jf_pass_through_operand (jfunc),
1459 res_type);
1462 /* Return the result of an ancestor jump function JFUNC on the constant value
1463 INPUT. Return NULL_TREE if that cannot be determined. */
1465 static tree
1466 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1468 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1469 if (TREE_CODE (input) == ADDR_EXPR)
1471 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1472 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1473 if (known_eq (off, 0))
1474 return input;
1475 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1476 return build1 (ADDR_EXPR, TREE_TYPE (input),
1477 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1478 build_int_cst (ptr_type_node, byte_offset)));
1480 else
1481 return NULL_TREE;
1484 /* Determine whether JFUNC evaluates to a single known constant value and if
1485 so, return it. Otherwise return NULL. INFO describes the caller node or
1486 the one it is inlined to, so that pass-through jump functions can be
1487 evaluated. PARM_TYPE is the type of the parameter to which the result is
1488 passed. */
1490 tree
1491 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1492 tree parm_type)
1494 if (jfunc->type == IPA_JF_CONST)
1495 return ipa_get_jf_constant (jfunc);
1496 else if (jfunc->type == IPA_JF_PASS_THROUGH
1497 || jfunc->type == IPA_JF_ANCESTOR)
1499 tree input;
1500 int idx;
1502 if (jfunc->type == IPA_JF_PASS_THROUGH)
1503 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1504 else
1505 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1507 if (info->ipcp_orig_node)
1508 input = info->known_csts[idx];
1509 else
1511 ipcp_lattice<tree> *lat;
1513 if (!info->lattices
1514 || idx >= ipa_get_param_count (info))
1515 return NULL_TREE;
1516 lat = ipa_get_scalar_lat (info, idx);
1517 if (!lat->is_single_const ())
1518 return NULL_TREE;
1519 input = lat->values->value;
1522 if (!input)
1523 return NULL_TREE;
1525 if (jfunc->type == IPA_JF_PASS_THROUGH)
1526 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1527 else
1528 return ipa_get_jf_ancestor_result (jfunc, input);
1530 else
1531 return NULL_TREE;
1534 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1535 that INFO describes the caller node or the one it is inlined to, CS is the
1536 call graph edge corresponding to JFUNC and CSIDX index of the described
1537 parameter. */
1539 ipa_polymorphic_call_context
1540 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1541 ipa_jump_func *jfunc)
1543 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1544 ipa_polymorphic_call_context ctx;
1545 ipa_polymorphic_call_context *edge_ctx
1546 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1548 if (edge_ctx && !edge_ctx->useless_p ())
1549 ctx = *edge_ctx;
1551 if (jfunc->type == IPA_JF_PASS_THROUGH
1552 || jfunc->type == IPA_JF_ANCESTOR)
1554 ipa_polymorphic_call_context srcctx;
1555 int srcidx;
1556 bool type_preserved = true;
1557 if (jfunc->type == IPA_JF_PASS_THROUGH)
1559 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1560 return ctx;
1561 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1562 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1564 else
1566 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1567 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1569 if (info->ipcp_orig_node)
1571 if (info->known_contexts.exists ())
1572 srcctx = info->known_contexts[srcidx];
1574 else
1576 if (!info->lattices
1577 || srcidx >= ipa_get_param_count (info))
1578 return ctx;
1579 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1580 lat = ipa_get_poly_ctx_lat (info, srcidx);
1581 if (!lat->is_single_const ())
1582 return ctx;
1583 srcctx = lat->values->value;
1585 if (srcctx.useless_p ())
1586 return ctx;
1587 if (jfunc->type == IPA_JF_ANCESTOR)
1588 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1589 if (!type_preserved)
1590 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1591 srcctx.combine_with (ctx);
1592 return srcctx;
1595 return ctx;
1598 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1599 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1600 the result is a range or an anti-range. */
1602 static bool
1603 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1604 value_range *src_vr,
1605 enum tree_code operation,
1606 tree dst_type, tree src_type)
1608 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1609 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1610 return false;
1611 return true;
1614 /* Determine value_range of JFUNC given that INFO describes the caller node or
1615 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1616 and PARM_TYPE of the parameter. */
1618 value_range
1619 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1620 ipa_jump_func *jfunc, tree parm_type)
1622 value_range vr;
1623 return vr;
1624 if (jfunc->m_vr)
1625 ipa_vr_operation_and_type_effects (&vr,
1626 jfunc->m_vr,
1627 NOP_EXPR, parm_type,
1628 jfunc->m_vr->type ());
1629 if (vr.singleton_p ())
1630 return vr;
1631 if (jfunc->type == IPA_JF_PASS_THROUGH)
1633 int idx;
1634 ipcp_transformation *sum
1635 = ipcp_get_transformation_summary (cs->caller->inlined_to
1636 ? cs->caller->inlined_to
1637 : cs->caller);
1638 if (!sum || !sum->m_vr)
1639 return vr;
1641 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1643 if (!(*sum->m_vr)[idx].known)
1644 return vr;
1645 tree vr_type = ipa_get_type (info, idx);
1646 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1647 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1648 (*sum->m_vr)[idx].type);
1650 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1652 if (TREE_CODE_CLASS (operation) == tcc_unary)
1654 value_range res;
1656 if (ipa_vr_operation_and_type_effects (&res,
1657 &srcvr,
1658 operation, parm_type,
1659 vr_type))
1660 vr.intersect (res);
1662 else
1664 value_range op_res, res;
1665 tree op = ipa_get_jf_pass_through_operand (jfunc);
1666 value_range op_vr (op, op);
1668 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1669 if (ipa_vr_operation_and_type_effects (&res,
1670 &op_res,
1671 NOP_EXPR, parm_type,
1672 vr_type))
1673 vr.intersect (res);
1676 return vr;
1679 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1680 parameter with the given INDEX. */
1682 static tree
1683 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1684 int index)
1686 struct ipa_agg_replacement_value *aggval;
1688 aggval = ipa_get_agg_replacements_for_node (node);
1689 while (aggval)
1691 if (aggval->offset == offset
1692 && aggval->index == index)
1693 return aggval->value;
1694 aggval = aggval->next;
1696 return NULL_TREE;
1699 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1700 single known constant value and if so, return it. Otherwise return NULL.
1701 NODE and INFO describes the caller node or the one it is inlined to, and
1702 its related info. */
1704 static tree
1705 ipa_agg_value_from_node (class ipa_node_params *info,
1706 struct cgraph_node *node,
1707 struct ipa_agg_jf_item *item)
1709 tree value = NULL_TREE;
1710 int src_idx;
1712 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1713 return NULL_TREE;
1715 if (item->jftype == IPA_JF_CONST)
1716 return item->value.constant;
1718 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1719 || item->jftype == IPA_JF_LOAD_AGG);
1721 src_idx = item->value.pass_through.formal_id;
1723 if (info->ipcp_orig_node)
1725 if (item->jftype == IPA_JF_PASS_THROUGH)
1726 value = info->known_csts[src_idx];
1727 else
1728 value = get_clone_agg_value (node, item->value.load_agg.offset,
1729 src_idx);
1731 else if (info->lattices)
1733 class ipcp_param_lattices *src_plats
1734 = ipa_get_parm_lattices (info, src_idx);
1736 if (item->jftype == IPA_JF_PASS_THROUGH)
1738 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1740 if (!lat->is_single_const ())
1741 return NULL_TREE;
1743 value = lat->values->value;
1745 else if (src_plats->aggs
1746 && !src_plats->aggs_bottom
1747 && !src_plats->aggs_contain_variable
1748 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1750 struct ipcp_agg_lattice *aglat;
1752 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1754 if (aglat->offset > item->value.load_agg.offset)
1755 break;
1757 if (aglat->offset == item->value.load_agg.offset)
1759 if (aglat->is_single_const ())
1760 value = aglat->values->value;
1761 break;
1767 if (!value)
1768 return NULL_TREE;
1770 if (item->jftype == IPA_JF_LOAD_AGG)
1772 tree load_type = item->value.load_agg.type;
1773 tree value_type = TREE_TYPE (value);
1775 /* Ensure value type is compatible with load type. */
1776 if (!useless_type_conversion_p (load_type, value_type))
1777 return NULL_TREE;
1780 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1781 value,
1782 item->value.pass_through.operand,
1783 item->type);
1786 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1787 an aggregate and if so, return it. Otherwise return an empty set. NODE
1788 and INFO describes the caller node or the one it is inlined to, and its
1789 related info. */
1791 struct ipa_agg_value_set
1792 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1793 struct ipa_agg_jump_function *agg_jfunc)
1795 struct ipa_agg_value_set agg;
1796 struct ipa_agg_jf_item *item;
1797 int i;
1799 agg.items = vNULL;
1800 agg.by_ref = agg_jfunc->by_ref;
1802 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1804 tree value = ipa_agg_value_from_node (info, node, item);
1806 if (value)
1808 struct ipa_agg_value value_item;
1810 value_item.offset = item->offset;
1811 value_item.value = value;
1813 agg.items.safe_push (value_item);
1816 return agg;
1819 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1820 bottom, not containing a variable component and without any known value at
1821 the same time. */
1823 DEBUG_FUNCTION void
1824 ipcp_verify_propagated_values (void)
1826 struct cgraph_node *node;
1828 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1830 ipa_node_params *info = ipa_node_params_sum->get (node);
1831 if (!opt_for_fn (node->decl, flag_ipa_cp)
1832 || !opt_for_fn (node->decl, optimize))
1833 continue;
1834 int i, count = ipa_get_param_count (info);
1836 for (i = 0; i < count; i++)
1838 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1840 if (!lat->bottom
1841 && !lat->contains_variable
1842 && lat->values_count == 0)
1844 if (dump_file)
1846 symtab->dump (dump_file);
1847 fprintf (dump_file, "\nIPA lattices after constant "
1848 "propagation, before gcc_unreachable:\n");
1849 print_all_lattices (dump_file, true, false);
1852 gcc_unreachable ();
1858 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1860 static bool
1861 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1862 ipa_polymorphic_call_context y)
1864 return x.equal_to (y);
1868 /* Add a new value source to the value represented by THIS, marking that a
1869 value comes from edge CS and (if the underlying jump function is a
1870 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1871 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1872 scalar value of the parameter itself or the offset within an aggregate. */
1874 template <typename valtype>
1875 void
1876 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1877 int src_idx, HOST_WIDE_INT offset)
1879 ipcp_value_source<valtype> *src;
1881 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1882 src->offset = offset;
1883 src->cs = cs;
1884 src->val = src_val;
1885 src->index = src_idx;
1887 src->next = sources;
1888 sources = src;
1891 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1892 SOURCE and clear all other fields. */
1894 static ipcp_value<tree> *
1895 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1897 ipcp_value<tree> *val;
1899 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1900 val->value = cst;
1901 val->self_recursion_generated_level = same_lat_gen_level;
1902 return val;
1905 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1906 value to SOURCE and clear all other fields. */
1908 static ipcp_value<ipa_polymorphic_call_context> *
1909 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1910 unsigned same_lat_gen_level)
1912 ipcp_value<ipa_polymorphic_call_context> *val;
1914 val = new (ipcp_poly_ctx_values_pool.allocate ())
1915 ipcp_value<ipa_polymorphic_call_context>();
1916 val->value = ctx;
1917 val->self_recursion_generated_level = same_lat_gen_level;
1918 return val;
1921 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1922 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1923 meaning. OFFSET -1 means the source is scalar and not a part of an
1924 aggregate. If non-NULL, VAL_P records address of existing or newly added
1925 ipcp_value.
1927 If the value is generated for a self-recursive call as a result of an
1928 arithmetic pass-through jump-function acting on a value in the same lattice,
1929 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1930 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1932 template <typename valtype>
1933 bool
1934 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1935 ipcp_value<valtype> *src_val,
1936 int src_idx, HOST_WIDE_INT offset,
1937 ipcp_value<valtype> **val_p,
1938 unsigned same_lat_gen_level)
1940 ipcp_value<valtype> *val, *last_val = NULL;
1942 if (val_p)
1943 *val_p = NULL;
1945 if (bottom)
1946 return false;
1948 for (val = values; val; last_val = val, val = val->next)
1949 if (values_equal_for_ipcp_p (val->value, newval))
1951 if (val_p)
1952 *val_p = val;
1954 if (val->self_recursion_generated_level < same_lat_gen_level)
1955 val->self_recursion_generated_level = same_lat_gen_level;
1957 if (ipa_edge_within_scc (cs))
1959 ipcp_value_source<valtype> *s;
1960 for (s = val->sources; s; s = s->next)
1961 if (s->cs == cs && s->val == src_val)
1962 break;
1963 if (s)
1964 return false;
1967 val->add_source (cs, src_val, src_idx, offset);
1968 return false;
1971 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1972 param_ipa_cp_value_list_size))
1974 /* We can only free sources, not the values themselves, because sources
1975 of other values in this SCC might point to them. */
1976 for (val = values; val; val = val->next)
1978 while (val->sources)
1980 ipcp_value_source<valtype> *src = val->sources;
1981 val->sources = src->next;
1982 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
1985 values = NULL;
1986 return set_to_bottom ();
1989 values_count++;
1990 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
1991 val->add_source (cs, src_val, src_idx, offset);
1992 val->next = NULL;
1994 /* Add the new value to end of value list, which can reduce iterations
1995 of propagation stage for recursive function. */
1996 if (last_val)
1997 last_val->next = val;
1998 else
1999 values = val;
2001 if (val_p)
2002 *val_p = val;
2004 return true;
2007 /* A helper function that returns result of operation specified by OPCODE on
2008 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2009 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2010 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2011 the result. */
2013 static tree
2014 get_val_across_arith_op (enum tree_code opcode,
2015 tree opnd1_type,
2016 tree opnd2,
2017 ipcp_value<tree> *src_val,
2018 tree res_type)
2020 tree opnd1 = src_val->value;
2022 /* Skip source values that is incompatible with specified type. */
2023 if (opnd1_type
2024 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2025 return NULL_TREE;
2027 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2030 /* Propagate values through an arithmetic transformation described by a jump
2031 function associated with edge CS, taking values from SRC_LAT and putting
2032 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2033 OPND2 is a constant value if transformation is a binary operation.
2034 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2035 a part of the aggregate. SRC_IDX is the index of the source parameter.
2036 RES_TYPE is the value type of result being propagated into. Return true if
2037 DEST_LAT changed. */
2039 static bool
2040 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2041 enum tree_code opcode,
2042 tree opnd1_type,
2043 tree opnd2,
2044 ipcp_lattice<tree> *src_lat,
2045 ipcp_lattice<tree> *dest_lat,
2046 HOST_WIDE_INT src_offset,
2047 int src_idx,
2048 tree res_type)
2050 ipcp_value<tree> *src_val;
2051 bool ret = false;
2053 /* Due to circular dependencies, propagating within an SCC through arithmetic
2054 transformation would create infinite number of values. But for
2055 self-feeding recursive function, we could allow propagation in a limited
2056 count, and this can enable a simple kind of recursive function versioning.
2057 For other scenario, we would just make lattices bottom. */
2058 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2060 int i;
2062 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2063 param_ipa_cp_max_recursive_depth);
2064 if (src_lat != dest_lat || max_recursive_depth < 1)
2065 return dest_lat->set_contains_variable ();
2067 /* No benefit if recursive execution is in low probability. */
2068 if (cs->sreal_frequency () * 100
2069 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2070 param_ipa_cp_min_recursive_probability))
2071 return dest_lat->set_contains_variable ();
2073 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2075 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2077 /* Now we do not use self-recursively generated value as propagation
2078 source, this is absolutely conservative, but could avoid explosion
2079 of lattice's value space, especially when one recursive function
2080 calls another recursive. */
2081 if (src_val->self_recursion_generated_p ())
2083 ipcp_value_source<tree> *s;
2085 /* If the lattice has already been propagated for the call site,
2086 no need to do that again. */
2087 for (s = src_val->sources; s; s = s->next)
2088 if (s->cs == cs)
2089 return dest_lat->set_contains_variable ();
2091 else
2092 val_seeds.safe_push (src_val);
2095 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2097 /* Recursively generate lattice values with a limited count. */
2098 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2100 for (int j = 1; j < max_recursive_depth; j++)
2102 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2103 src_val, res_type);
2104 if (!cstval
2105 || !ipacp_value_safe_for_type (res_type, cstval))
2106 break;
2108 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2109 src_offset, &src_val, j);
2110 gcc_checking_assert (src_val);
2113 ret |= dest_lat->set_contains_variable ();
2115 else
2116 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2118 /* Now we do not use self-recursively generated value as propagation
2119 source, otherwise it is easy to make value space of normal lattice
2120 overflow. */
2121 if (src_val->self_recursion_generated_p ())
2123 ret |= dest_lat->set_contains_variable ();
2124 continue;
2127 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2128 src_val, res_type);
2129 if (cstval
2130 && ipacp_value_safe_for_type (res_type, cstval))
2131 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2132 src_offset);
2133 else
2134 ret |= dest_lat->set_contains_variable ();
2137 return ret;
2140 /* Propagate values through a pass-through jump function JFUNC associated with
2141 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2142 is the index of the source parameter. PARM_TYPE is the type of the
2143 parameter to which the result is passed. */
2145 static bool
2146 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2147 ipcp_lattice<tree> *src_lat,
2148 ipcp_lattice<tree> *dest_lat, int src_idx,
2149 tree parm_type)
2151 return propagate_vals_across_arith_jfunc (cs,
2152 ipa_get_jf_pass_through_operation (jfunc),
2153 NULL_TREE,
2154 ipa_get_jf_pass_through_operand (jfunc),
2155 src_lat, dest_lat, -1, src_idx, parm_type);
2158 /* Propagate values through an ancestor jump function JFUNC associated with
2159 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2160 is the index of the source parameter. */
2162 static bool
2163 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2164 struct ipa_jump_func *jfunc,
2165 ipcp_lattice<tree> *src_lat,
2166 ipcp_lattice<tree> *dest_lat, int src_idx,
2167 tree param_type)
2169 ipcp_value<tree> *src_val;
2170 bool ret = false;
2172 if (ipa_edge_within_scc (cs))
2173 return dest_lat->set_contains_variable ();
2175 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2177 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2179 if (t && ipacp_value_safe_for_type (param_type, t))
2180 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2181 else
2182 ret |= dest_lat->set_contains_variable ();
2185 return ret;
2188 /* Propagate scalar values across jump function JFUNC that is associated with
2189 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2190 parameter to which the result is passed. */
2192 static bool
2193 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2194 struct ipa_jump_func *jfunc,
2195 ipcp_lattice<tree> *dest_lat,
2196 tree param_type)
2198 if (dest_lat->bottom)
2199 return false;
2201 if (jfunc->type == IPA_JF_CONST)
2203 tree val = ipa_get_jf_constant (jfunc);
2204 if (ipacp_value_safe_for_type (param_type, val))
2205 return dest_lat->add_value (val, cs, NULL, 0);
2206 else
2207 return dest_lat->set_contains_variable ();
2209 else if (jfunc->type == IPA_JF_PASS_THROUGH
2210 || jfunc->type == IPA_JF_ANCESTOR)
2212 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2213 ipcp_lattice<tree> *src_lat;
2214 int src_idx;
2215 bool ret;
2217 if (jfunc->type == IPA_JF_PASS_THROUGH)
2218 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2219 else
2220 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2222 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2223 if (src_lat->bottom)
2224 return dest_lat->set_contains_variable ();
2226 /* If we would need to clone the caller and cannot, do not propagate. */
2227 if (!ipcp_versionable_function_p (cs->caller)
2228 && (src_lat->contains_variable
2229 || (src_lat->values_count > 1)))
2230 return dest_lat->set_contains_variable ();
2232 if (jfunc->type == IPA_JF_PASS_THROUGH)
2233 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2234 dest_lat, src_idx,
2235 param_type);
2236 else
2237 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2238 src_idx, param_type);
2240 if (src_lat->contains_variable)
2241 ret |= dest_lat->set_contains_variable ();
2243 return ret;
2246 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2247 use it for indirect inlining), we should propagate them too. */
2248 return dest_lat->set_contains_variable ();
2251 /* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and describes argument IDX and put the values into DEST_LAT. */
2254 static bool
2255 propagate_context_across_jump_function (cgraph_edge *cs,
2256 ipa_jump_func *jfunc, int idx,
2257 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2259 if (dest_lat->bottom)
2260 return false;
2261 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2262 bool ret = false;
2263 bool added_sth = false;
2264 bool type_preserved = true;
2266 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2267 = ipa_get_ith_polymorhic_call_context (args, idx);
2269 if (edge_ctx_ptr)
2270 edge_ctx = *edge_ctx_ptr;
2272 if (jfunc->type == IPA_JF_PASS_THROUGH
2273 || jfunc->type == IPA_JF_ANCESTOR)
2275 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2276 int src_idx;
2277 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2279 /* TODO: Once we figure out how to propagate speculations, it will
2280 probably be a good idea to switch to speculation if type_preserved is
2281 not set instead of punting. */
2282 if (jfunc->type == IPA_JF_PASS_THROUGH)
2284 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2285 goto prop_fail;
2286 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2287 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2289 else
2291 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2292 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2295 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2296 /* If we would need to clone the caller and cannot, do not propagate. */
2297 if (!ipcp_versionable_function_p (cs->caller)
2298 && (src_lat->contains_variable
2299 || (src_lat->values_count > 1)))
2300 goto prop_fail;
2302 ipcp_value<ipa_polymorphic_call_context> *src_val;
2303 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2305 ipa_polymorphic_call_context cur = src_val->value;
2307 if (!type_preserved)
2308 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2309 if (jfunc->type == IPA_JF_ANCESTOR)
2310 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2311 /* TODO: In cases we know how the context is going to be used,
2312 we can improve the result by passing proper OTR_TYPE. */
2313 cur.combine_with (edge_ctx);
2314 if (!cur.useless_p ())
2316 if (src_lat->contains_variable
2317 && !edge_ctx.equal_to (cur))
2318 ret |= dest_lat->set_contains_variable ();
2319 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2320 added_sth = true;
2325 prop_fail:
2326 if (!added_sth)
2328 if (!edge_ctx.useless_p ())
2329 ret |= dest_lat->add_value (edge_ctx, cs);
2330 else
2331 ret |= dest_lat->set_contains_variable ();
2334 return ret;
2337 /* Propagate bits across jfunc that is associated with
2338 edge cs and update dest_lattice accordingly. */
2340 bool
2341 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2342 ipa_jump_func *jfunc,
2343 ipcp_bits_lattice *dest_lattice)
2345 if (dest_lattice->bottom_p ())
2346 return false;
2348 enum availability availability;
2349 cgraph_node *callee = cs->callee->function_symbol (&availability);
2350 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2351 tree parm_type = ipa_get_type (callee_info, idx);
2353 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2354 transform for these cases. Similarly, we can have bad type mismatches
2355 with LTO, avoid doing anything with those too. */
2356 if (!parm_type
2357 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2360 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2361 "param %i of %s is NULL or unsuitable for bits propagation\n",
2362 idx, cs->callee->dump_name ());
2364 return dest_lattice->set_to_bottom ();
2367 unsigned precision = TYPE_PRECISION (parm_type);
2368 signop sgn = TYPE_SIGN (parm_type);
2370 if (jfunc->type == IPA_JF_PASS_THROUGH
2371 || jfunc->type == IPA_JF_ANCESTOR)
2373 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2374 tree operand = NULL_TREE;
2375 enum tree_code code;
2376 unsigned src_idx;
2378 if (jfunc->type == IPA_JF_PASS_THROUGH)
2380 code = ipa_get_jf_pass_through_operation (jfunc);
2381 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2382 if (code != NOP_EXPR)
2383 operand = ipa_get_jf_pass_through_operand (jfunc);
2385 else
2387 code = POINTER_PLUS_EXPR;
2388 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2389 unsigned HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2390 operand = build_int_cstu (size_type_node, offset);
2393 class ipcp_param_lattices *src_lats
2394 = ipa_get_parm_lattices (caller_info, src_idx);
2396 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2397 for eg consider:
2398 int f(int x)
2400 g (x & 0xff);
2402 Assume lattice for x is bottom, however we can still propagate
2403 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2404 and we store it in jump function during analysis stage. */
2406 if (src_lats->bits_lattice.bottom_p ()
2407 && jfunc->bits)
2408 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2409 precision);
2410 else
2411 return dest_lattice->meet_with (src_lats->bits_lattice, precision, sgn,
2412 code, operand);
2415 else if (jfunc->type == IPA_JF_ANCESTOR)
2416 return dest_lattice->set_to_bottom ();
2417 else if (jfunc->bits)
2418 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2419 precision);
2420 else
2421 return dest_lattice->set_to_bottom ();
2424 /* Propagate value range across jump function JFUNC that is associated with
2425 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2426 accordingly. */
2428 static bool
2429 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2430 class ipcp_param_lattices *dest_plats,
2431 tree param_type)
2433 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2435 if (dest_lat->bottom_p ())
2436 return false;
2438 if (!param_type
2439 || (!INTEGRAL_TYPE_P (param_type)
2440 && !POINTER_TYPE_P (param_type)))
2441 return dest_lat->set_to_bottom ();
2443 if (jfunc->type == IPA_JF_PASS_THROUGH)
2445 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2446 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2447 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2448 class ipcp_param_lattices *src_lats
2449 = ipa_get_parm_lattices (caller_info, src_idx);
2450 tree operand_type = ipa_get_type (caller_info, src_idx);
2452 if (src_lats->m_value_range.bottom_p ())
2453 return dest_lat->set_to_bottom ();
2455 value_range vr;
2456 if (TREE_CODE_CLASS (operation) == tcc_unary)
2457 ipa_vr_operation_and_type_effects (&vr,
2458 &src_lats->m_value_range.m_vr,
2459 operation, param_type,
2460 operand_type);
2461 /* A crude way to prevent unbounded number of value range updates
2462 in SCC components. We should allow limited number of updates within
2463 SCC, too. */
2464 else if (!ipa_edge_within_scc (cs))
2466 tree op = ipa_get_jf_pass_through_operand (jfunc);
2467 value_range op_vr (op, op);
2468 value_range op_res,res;
2470 range_fold_binary_expr (&op_res, operation, operand_type,
2471 &src_lats->m_value_range.m_vr, &op_vr);
2472 ipa_vr_operation_and_type_effects (&vr,
2473 &op_res,
2474 NOP_EXPR, param_type,
2475 operand_type);
2477 if (!vr.undefined_p () && !vr.varying_p ())
2479 if (jfunc->m_vr)
2481 value_range jvr;
2482 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2483 NOP_EXPR,
2484 param_type,
2485 jfunc->m_vr->type ()))
2486 vr.intersect (jvr);
2488 return dest_lat->meet_with (&vr);
2491 else if (jfunc->type == IPA_JF_CONST)
2493 tree val = ipa_get_jf_constant (jfunc);
2494 if (TREE_CODE (val) == INTEGER_CST)
2496 val = fold_convert (param_type, val);
2497 if (TREE_OVERFLOW_P (val))
2498 val = drop_tree_overflow (val);
2500 value_range tmpvr (val, val);
2501 return dest_lat->meet_with (&tmpvr);
2505 value_range vr;
2506 if (jfunc->m_vr
2507 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2508 param_type,
2509 jfunc->m_vr->type ()))
2510 return dest_lat->meet_with (&vr);
2511 else
2512 return dest_lat->set_to_bottom ();
2515 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2516 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2517 other cases, return false). If there are no aggregate items, set
2518 aggs_by_ref to NEW_AGGS_BY_REF. */
2520 static bool
2521 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2522 bool new_aggs_by_ref)
2524 if (dest_plats->aggs)
2526 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2528 set_agg_lats_to_bottom (dest_plats);
2529 return true;
2532 else
2533 dest_plats->aggs_by_ref = new_aggs_by_ref;
2534 return false;
2537 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2538 already existing lattice for the given OFFSET and SIZE, marking all skipped
2539 lattices as containing variable and checking for overlaps. If there is no
2540 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2541 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2542 unless there are too many already. If there are two many, return false. If
2543 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2544 skipped lattices were newly marked as containing variable, set *CHANGE to
2545 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2547 static bool
2548 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2549 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2550 struct ipcp_agg_lattice ***aglat,
2551 bool pre_existing, bool *change, int max_agg_items)
2553 gcc_checking_assert (offset >= 0);
2555 while (**aglat && (**aglat)->offset < offset)
2557 if ((**aglat)->offset + (**aglat)->size > offset)
2559 set_agg_lats_to_bottom (dest_plats);
2560 return false;
2562 *change |= (**aglat)->set_contains_variable ();
2563 *aglat = &(**aglat)->next;
2566 if (**aglat && (**aglat)->offset == offset)
2568 if ((**aglat)->size != val_size)
2570 set_agg_lats_to_bottom (dest_plats);
2571 return false;
2573 gcc_assert (!(**aglat)->next
2574 || (**aglat)->next->offset >= offset + val_size);
2575 return true;
2577 else
2579 struct ipcp_agg_lattice *new_al;
2581 if (**aglat && (**aglat)->offset < offset + val_size)
2583 set_agg_lats_to_bottom (dest_plats);
2584 return false;
2586 if (dest_plats->aggs_count == max_agg_items)
2587 return false;
2588 dest_plats->aggs_count++;
2589 new_al = ipcp_agg_lattice_pool.allocate ();
2590 memset (new_al, 0, sizeof (*new_al));
2592 new_al->offset = offset;
2593 new_al->size = val_size;
2594 new_al->contains_variable = pre_existing;
2596 new_al->next = **aglat;
2597 **aglat = new_al;
2598 return true;
2602 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2603 containing an unknown value. */
2605 static bool
2606 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2608 bool ret = false;
2609 while (aglat)
2611 ret |= aglat->set_contains_variable ();
2612 aglat = aglat->next;
2614 return ret;
2617 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2618 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2619 parameter used for lattice value sources. Return true if DEST_PLATS changed
2620 in any way. */
2622 static bool
2623 merge_aggregate_lattices (struct cgraph_edge *cs,
2624 class ipcp_param_lattices *dest_plats,
2625 class ipcp_param_lattices *src_plats,
2626 int src_idx, HOST_WIDE_INT offset_delta)
2628 bool pre_existing = dest_plats->aggs != NULL;
2629 struct ipcp_agg_lattice **dst_aglat;
2630 bool ret = false;
2632 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2633 return true;
2634 if (src_plats->aggs_bottom)
2635 return set_agg_lats_contain_variable (dest_plats);
2636 if (src_plats->aggs_contain_variable)
2637 ret |= set_agg_lats_contain_variable (dest_plats);
2638 dst_aglat = &dest_plats->aggs;
2640 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2641 param_ipa_max_agg_items);
2642 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2643 src_aglat;
2644 src_aglat = src_aglat->next)
2646 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2648 if (new_offset < 0)
2649 continue;
2650 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2651 &dst_aglat, pre_existing, &ret, max_agg_items))
2653 struct ipcp_agg_lattice *new_al = *dst_aglat;
2655 dst_aglat = &(*dst_aglat)->next;
2656 if (src_aglat->bottom)
2658 ret |= new_al->set_contains_variable ();
2659 continue;
2661 if (src_aglat->contains_variable)
2662 ret |= new_al->set_contains_variable ();
2663 for (ipcp_value<tree> *val = src_aglat->values;
2664 val;
2665 val = val->next)
2666 ret |= new_al->add_value (val->value, cs, val, src_idx,
2667 src_aglat->offset);
2669 else if (dest_plats->aggs_bottom)
2670 return true;
2672 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2673 return ret;
2676 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2677 pass-through JFUNC and if so, whether it has conform and conforms to the
2678 rules about propagating values passed by reference. */
2680 static bool
2681 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2682 struct ipa_jump_func *jfunc)
2684 return src_plats->aggs
2685 && (!src_plats->aggs_by_ref
2686 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2689 /* Propagate values through ITEM, jump function for a part of an aggregate,
2690 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2691 associated with the jump function. Return true if AGLAT changed in any
2692 way. */
2694 static bool
2695 propagate_aggregate_lattice (struct cgraph_edge *cs,
2696 struct ipa_agg_jf_item *item,
2697 struct ipcp_agg_lattice *aglat)
2699 class ipa_node_params *caller_info;
2700 class ipcp_param_lattices *src_plats;
2701 struct ipcp_lattice<tree> *src_lat;
2702 HOST_WIDE_INT src_offset;
2703 int src_idx;
2704 tree load_type;
2705 bool ret;
2707 if (item->jftype == IPA_JF_CONST)
2709 tree value = item->value.constant;
2711 gcc_checking_assert (is_gimple_ip_invariant (value));
2712 return aglat->add_value (value, cs, NULL, 0);
2715 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2716 || item->jftype == IPA_JF_LOAD_AGG);
2718 caller_info = ipa_node_params_sum->get (cs->caller);
2719 src_idx = item->value.pass_through.formal_id;
2720 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2722 if (item->jftype == IPA_JF_PASS_THROUGH)
2724 load_type = NULL_TREE;
2725 src_lat = &src_plats->itself;
2726 src_offset = -1;
2728 else
2730 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2731 struct ipcp_agg_lattice *src_aglat;
2733 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2734 if (src_aglat->offset >= load_offset)
2735 break;
2737 load_type = item->value.load_agg.type;
2738 if (!src_aglat
2739 || src_aglat->offset > load_offset
2740 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2741 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2742 return aglat->set_contains_variable ();
2744 src_lat = src_aglat;
2745 src_offset = load_offset;
2748 if (src_lat->bottom
2749 || (!ipcp_versionable_function_p (cs->caller)
2750 && !src_lat->is_single_const ()))
2751 return aglat->set_contains_variable ();
2753 ret = propagate_vals_across_arith_jfunc (cs,
2754 item->value.pass_through.operation,
2755 load_type,
2756 item->value.pass_through.operand,
2757 src_lat, aglat,
2758 src_offset,
2759 src_idx,
2760 item->type);
2762 if (src_lat->contains_variable)
2763 ret |= aglat->set_contains_variable ();
2765 return ret;
2768 /* Propagate scalar values across jump function JFUNC that is associated with
2769 edge CS and put the values into DEST_LAT. */
2771 static bool
2772 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2773 struct ipa_jump_func *jfunc,
2774 class ipcp_param_lattices *dest_plats)
2776 bool ret = false;
2778 if (dest_plats->aggs_bottom)
2779 return false;
2781 if (jfunc->type == IPA_JF_PASS_THROUGH
2782 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2784 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2785 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2786 class ipcp_param_lattices *src_plats;
2788 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2789 if (agg_pass_through_permissible_p (src_plats, jfunc))
2791 /* Currently we do not produce clobber aggregate jump
2792 functions, replace with merging when we do. */
2793 gcc_assert (!jfunc->agg.items);
2794 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2795 src_idx, 0);
2796 return ret;
2799 else if (jfunc->type == IPA_JF_ANCESTOR
2800 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2802 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2803 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2804 class ipcp_param_lattices *src_plats;
2806 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2807 if (src_plats->aggs && src_plats->aggs_by_ref)
2809 /* Currently we do not produce clobber aggregate jump
2810 functions, replace with merging when we do. */
2811 gcc_assert (!jfunc->agg.items);
2812 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2813 ipa_get_jf_ancestor_offset (jfunc));
2815 else if (!src_plats->aggs_by_ref)
2816 ret |= set_agg_lats_to_bottom (dest_plats);
2817 else
2818 ret |= set_agg_lats_contain_variable (dest_plats);
2819 return ret;
2822 if (jfunc->agg.items)
2824 bool pre_existing = dest_plats->aggs != NULL;
2825 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2826 struct ipa_agg_jf_item *item;
2827 int i;
2829 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2830 return true;
2832 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2833 param_ipa_max_agg_items);
2834 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2836 HOST_WIDE_INT val_size;
2838 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2839 continue;
2840 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2842 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2843 &aglat, pre_existing, &ret, max_agg_items))
2845 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2846 aglat = &(*aglat)->next;
2848 else if (dest_plats->aggs_bottom)
2849 return true;
2852 ret |= set_chain_of_aglats_contains_variable (*aglat);
2854 else
2855 ret |= set_agg_lats_contain_variable (dest_plats);
2857 return ret;
2860 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2861 non-thunk) destination, the call passes through a thunk. */
2863 static bool
2864 call_passes_through_thunk (cgraph_edge *cs)
2866 cgraph_node *alias_or_thunk = cs->callee;
2867 while (alias_or_thunk->alias)
2868 alias_or_thunk = alias_or_thunk->get_alias_target ();
2869 return alias_or_thunk->thunk;
2872 /* Propagate constants from the caller to the callee of CS. INFO describes the
2873 caller. */
2875 static bool
2876 propagate_constants_across_call (struct cgraph_edge *cs)
2878 class ipa_node_params *callee_info;
2879 enum availability availability;
2880 cgraph_node *callee;
2881 class ipa_edge_args *args;
2882 bool ret = false;
2883 int i, args_count, parms_count;
2885 callee = cs->callee->function_symbol (&availability);
2886 if (!callee->definition)
2887 return false;
2888 gcc_checking_assert (callee->has_gimple_body_p ());
2889 callee_info = ipa_node_params_sum->get (callee);
2890 if (!callee_info)
2891 return false;
2893 args = ipa_edge_args_sum->get (cs);
2894 parms_count = ipa_get_param_count (callee_info);
2895 if (parms_count == 0)
2896 return false;
2897 if (!args
2898 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2899 || !opt_for_fn (cs->caller->decl, optimize))
2901 for (i = 0; i < parms_count; i++)
2902 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2903 i));
2904 return ret;
2906 args_count = ipa_get_cs_argument_count (args);
2908 /* If this call goes through a thunk we must not propagate to the first (0th)
2909 parameter. However, we might need to uncover a thunk from below a series
2910 of aliases first. */
2911 if (call_passes_through_thunk (cs))
2913 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2914 0));
2915 i = 1;
2917 else
2918 i = 0;
2920 for (; (i < args_count) && (i < parms_count); i++)
2922 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2923 class ipcp_param_lattices *dest_plats;
2924 tree param_type = ipa_get_type (callee_info, i);
2926 dest_plats = ipa_get_parm_lattices (callee_info, i);
2927 if (availability == AVAIL_INTERPOSABLE)
2928 ret |= set_all_contains_variable (dest_plats);
2929 else
2931 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2932 &dest_plats->itself,
2933 param_type);
2934 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2935 &dest_plats->ctxlat);
2937 |= propagate_bits_across_jump_function (cs, i, jump_func,
2938 &dest_plats->bits_lattice);
2939 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2940 dest_plats);
2941 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2942 ret |= propagate_vr_across_jump_function (cs, jump_func,
2943 dest_plats, param_type);
2944 else
2945 ret |= dest_plats->m_value_range.set_to_bottom ();
2948 for (; i < parms_count; i++)
2949 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2951 return ret;
2954 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2955 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2956 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2958 static tree
2959 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2960 const vec<tree> &known_csts,
2961 const vec<ipa_polymorphic_call_context> &known_contexts,
2962 const vec<ipa_agg_value_set> &known_aggs,
2963 struct ipa_agg_replacement_value *agg_reps,
2964 bool *speculative)
2966 int param_index = ie->indirect_info->param_index;
2967 HOST_WIDE_INT anc_offset;
2968 tree t = NULL;
2969 tree target = NULL;
2971 *speculative = false;
2973 if (param_index == -1)
2974 return NULL_TREE;
2976 if (!ie->indirect_info->polymorphic)
2978 tree t = NULL;
2980 if (ie->indirect_info->agg_contents)
2982 t = NULL;
2983 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
2985 while (agg_reps)
2987 if (agg_reps->index == param_index
2988 && agg_reps->offset == ie->indirect_info->offset
2989 && agg_reps->by_ref == ie->indirect_info->by_ref)
2991 t = agg_reps->value;
2992 break;
2994 agg_reps = agg_reps->next;
2997 if (!t)
2999 const ipa_agg_value_set *agg;
3000 if (known_aggs.length () > (unsigned int) param_index)
3001 agg = &known_aggs[param_index];
3002 else
3003 agg = NULL;
3004 bool from_global_constant;
3005 t = ipa_find_agg_cst_for_param (agg,
3006 (unsigned) param_index
3007 < known_csts.length ()
3008 ? known_csts[param_index]
3009 : NULL,
3010 ie->indirect_info->offset,
3011 ie->indirect_info->by_ref,
3012 &from_global_constant);
3013 if (t
3014 && !from_global_constant
3015 && !ie->indirect_info->guaranteed_unmodified)
3016 t = NULL_TREE;
3019 else if ((unsigned) param_index < known_csts.length ())
3020 t = known_csts[param_index];
3022 if (t
3023 && TREE_CODE (t) == ADDR_EXPR
3024 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3025 return TREE_OPERAND (t, 0);
3026 else
3027 return NULL_TREE;
3030 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3031 return NULL_TREE;
3033 gcc_assert (!ie->indirect_info->agg_contents);
3034 anc_offset = ie->indirect_info->offset;
3036 t = NULL;
3038 /* Try to work out value of virtual table pointer value in replacements. */
3039 if (!t && agg_reps && !ie->indirect_info->by_ref)
3041 while (agg_reps)
3043 if (agg_reps->index == param_index
3044 && agg_reps->offset == ie->indirect_info->offset
3045 && agg_reps->by_ref)
3047 t = agg_reps->value;
3048 break;
3050 agg_reps = agg_reps->next;
3054 /* Try to work out value of virtual table pointer value in known
3055 aggregate values. */
3056 if (!t && known_aggs.length () > (unsigned int) param_index
3057 && !ie->indirect_info->by_ref)
3059 const ipa_agg_value_set *agg = &known_aggs[param_index];
3060 t = ipa_find_agg_cst_for_param (agg,
3061 (unsigned) param_index
3062 < known_csts.length ()
3063 ? known_csts[param_index] : NULL,
3064 ie->indirect_info->offset, true);
3067 /* If we found the virtual table pointer, lookup the target. */
3068 if (t)
3070 tree vtable;
3071 unsigned HOST_WIDE_INT offset;
3072 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3074 bool can_refer;
3075 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3076 vtable, offset, &can_refer);
3077 if (can_refer)
3079 if (!target
3080 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3081 || !possible_polymorphic_call_target_p
3082 (ie, cgraph_node::get (target)))
3084 /* Do not speculate builtin_unreachable, it is stupid! */
3085 if (ie->indirect_info->vptr_changed)
3086 return NULL;
3087 target = ipa_impossible_devirt_target (ie, target);
3089 *speculative = ie->indirect_info->vptr_changed;
3090 if (!*speculative)
3091 return target;
3096 /* Do we know the constant value of pointer? */
3097 if (!t && (unsigned) param_index < known_csts.length ())
3098 t = known_csts[param_index];
3100 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3102 ipa_polymorphic_call_context context;
3103 if (known_contexts.length () > (unsigned int) param_index)
3105 context = known_contexts[param_index];
3106 context.offset_by (anc_offset);
3107 if (ie->indirect_info->vptr_changed)
3108 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3109 ie->indirect_info->otr_type);
3110 if (t)
3112 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3113 (t, ie->indirect_info->otr_type, anc_offset);
3114 if (!ctx2.useless_p ())
3115 context.combine_with (ctx2, ie->indirect_info->otr_type);
3118 else if (t)
3120 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3121 anc_offset);
3122 if (ie->indirect_info->vptr_changed)
3123 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3124 ie->indirect_info->otr_type);
3126 else
3127 return NULL_TREE;
3129 vec <cgraph_node *>targets;
3130 bool final;
3132 targets = possible_polymorphic_call_targets
3133 (ie->indirect_info->otr_type,
3134 ie->indirect_info->otr_token,
3135 context, &final);
3136 if (!final || targets.length () > 1)
3138 struct cgraph_node *node;
3139 if (*speculative)
3140 return target;
3141 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3142 || ie->speculative || !ie->maybe_hot_p ())
3143 return NULL;
3144 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3145 ie->indirect_info->otr_token,
3146 context);
3147 if (node)
3149 *speculative = true;
3150 target = node->decl;
3152 else
3153 return NULL;
3155 else
3157 *speculative = false;
3158 if (targets.length () == 1)
3159 target = targets[0]->decl;
3160 else
3161 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3164 if (target && !possible_polymorphic_call_target_p (ie,
3165 cgraph_node::get (target)))
3167 if (*speculative)
3168 return NULL;
3169 target = ipa_impossible_devirt_target (ie, target);
3172 return target;
3175 /* If an indirect edge IE can be turned into a direct one based on data in
3176 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3177 whether the discovered target is only speculative guess. */
3179 tree
3180 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3181 ipa_call_arg_values *avals,
3182 bool *speculative)
3184 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3185 avals->m_known_contexts,
3186 avals->m_known_aggs,
3187 NULL, speculative);
3190 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3192 tree
3193 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3194 ipa_auto_call_arg_values *avals,
3195 bool *speculative)
3197 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3198 avals->m_known_contexts,
3199 avals->m_known_aggs,
3200 NULL, speculative);
3203 /* Calculate devirtualization time bonus for NODE, assuming we know information
3204 about arguments stored in AVALS. */
3206 static int
3207 devirtualization_time_bonus (struct cgraph_node *node,
3208 ipa_auto_call_arg_values *avals)
3210 struct cgraph_edge *ie;
3211 int res = 0;
3213 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3215 struct cgraph_node *callee;
3216 class ipa_fn_summary *isummary;
3217 enum availability avail;
3218 tree target;
3219 bool speculative;
3221 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3222 if (!target)
3223 continue;
3225 /* Only bare minimum benefit for clearly un-inlineable targets. */
3226 res += 1;
3227 callee = cgraph_node::get (target);
3228 if (!callee || !callee->definition)
3229 continue;
3230 callee = callee->function_symbol (&avail);
3231 if (avail < AVAIL_AVAILABLE)
3232 continue;
3233 isummary = ipa_fn_summaries->get (callee);
3234 if (!isummary || !isummary->inlinable)
3235 continue;
3237 int size = ipa_size_summaries->get (callee)->size;
3238 /* FIXME: The values below need re-considering and perhaps also
3239 integrating into the cost metrics, at lest in some very basic way. */
3240 int max_inline_insns_auto
3241 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3242 if (size <= max_inline_insns_auto / 4)
3243 res += 31 / ((int)speculative + 1);
3244 else if (size <= max_inline_insns_auto / 2)
3245 res += 15 / ((int)speculative + 1);
3246 else if (size <= max_inline_insns_auto
3247 || DECL_DECLARED_INLINE_P (callee->decl))
3248 res += 7 / ((int)speculative + 1);
3251 return res;
3254 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3256 static int
3257 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3259 int result = 0;
3260 ipa_hints hints = estimates.hints;
3261 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3262 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3264 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3266 if (hints & INLINE_HINT_loop_iterations)
3267 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3269 if (hints & INLINE_HINT_loop_stride)
3270 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3272 return result;
3275 /* If there is a reason to penalize the function described by INFO in the
3276 cloning goodness evaluation, do so. */
3278 static inline sreal
3279 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3280 sreal evaluation)
3282 if (info->node_within_scc && !info->node_is_self_scc)
3283 evaluation = (evaluation
3284 * (100 - opt_for_fn (node->decl,
3285 param_ipa_cp_recursion_penalty))) / 100;
3287 if (info->node_calling_single_call)
3288 evaluation = (evaluation
3289 * (100 - opt_for_fn (node->decl,
3290 param_ipa_cp_single_call_penalty)))
3291 / 100;
3293 return evaluation;
3296 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3297 and SIZE_COST and with the sum of frequencies of incoming edges to the
3298 potential new clone in FREQUENCIES. */
3300 static bool
3301 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3302 sreal freq_sum, profile_count count_sum,
3303 int size_cost)
3305 if (time_benefit == 0
3306 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3307 || node->optimize_for_size_p ())
3308 return false;
3310 gcc_assert (size_cost > 0);
3312 ipa_node_params *info = ipa_node_params_sum->get (node);
3313 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3314 if (count_sum > profile_count::zero ())
3316 gcc_assert (base_count > profile_count::zero ());
3317 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3318 sreal evaluation = (time_benefit * factor) / size_cost;
3319 evaluation = incorporate_penalties (node, info, evaluation);
3320 evaluation *= 1000;
3322 if (dump_file && (dump_flags & TDF_DETAILS))
3324 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3325 "size: %i, count_sum: ", time_benefit.to_double (),
3326 size_cost);
3327 count_sum.dump (dump_file);
3328 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3329 info->node_within_scc
3330 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3331 info->node_calling_single_call ? ", single_call" : "",
3332 evaluation.to_double (), eval_threshold);
3335 return evaluation.to_int () >= eval_threshold;
3337 else
3339 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3340 evaluation = incorporate_penalties (node, info, evaluation);
3341 evaluation *= 1000;
3343 if (dump_file && (dump_flags & TDF_DETAILS))
3344 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3345 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3346 "threshold: %i\n",
3347 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3348 info->node_within_scc
3349 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3350 info->node_calling_single_call ? ", single_call" : "",
3351 evaluation.to_double (), eval_threshold);
3353 return evaluation.to_int () >= eval_threshold;
3357 /* Return all context independent values from aggregate lattices in PLATS in a
3358 vector. Return NULL if there are none. */
3360 static vec<ipa_agg_value>
3361 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3363 vec<ipa_agg_value> res = vNULL;
3365 if (plats->aggs_bottom
3366 || plats->aggs_contain_variable
3367 || plats->aggs_count == 0)
3368 return vNULL;
3370 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3371 aglat;
3372 aglat = aglat->next)
3373 if (aglat->is_single_const ())
3375 struct ipa_agg_value item;
3376 item.offset = aglat->offset;
3377 item.value = aglat->values->value;
3378 res.safe_push (item);
3380 return res;
3383 /* Grow vectors in AVALS and fill them with information about values of
3384 parameters that are known to be independent of the context. Only calculate
3385 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3386 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3387 parameters will be stored in it.
3389 TODO: Also grow context independent value range vectors. */
3391 static bool
3392 gather_context_independent_values (class ipa_node_params *info,
3393 ipa_auto_call_arg_values *avals,
3394 bool calculate_aggs,
3395 int *removable_params_cost)
3397 int i, count = ipa_get_param_count (info);
3398 bool ret = false;
3400 avals->m_known_vals.safe_grow_cleared (count, true);
3401 avals->m_known_contexts.safe_grow_cleared (count, true);
3402 if (calculate_aggs)
3403 avals->m_known_aggs.safe_grow_cleared (count, true);
3405 if (removable_params_cost)
3406 *removable_params_cost = 0;
3408 for (i = 0; i < count; i++)
3410 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3411 ipcp_lattice<tree> *lat = &plats->itself;
3413 if (lat->is_single_const ())
3415 ipcp_value<tree> *val = lat->values;
3416 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3417 avals->m_known_vals[i] = val->value;
3418 if (removable_params_cost)
3419 *removable_params_cost
3420 += estimate_move_cost (TREE_TYPE (val->value), false);
3421 ret = true;
3423 else if (removable_params_cost
3424 && !ipa_is_param_used (info, i))
3425 *removable_params_cost
3426 += ipa_get_param_move_cost (info, i);
3428 if (!ipa_is_param_used (info, i))
3429 continue;
3431 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3432 /* Do not account known context as reason for cloning. We can see
3433 if it permits devirtualization. */
3434 if (ctxlat->is_single_const ())
3435 avals->m_known_contexts[i] = ctxlat->values->value;
3437 if (calculate_aggs)
3439 vec<ipa_agg_value> agg_items;
3440 struct ipa_agg_value_set *agg;
3442 agg_items = context_independent_aggregate_values (plats);
3443 agg = &avals->m_known_aggs[i];
3444 agg->items = agg_items;
3445 agg->by_ref = plats->aggs_by_ref;
3446 ret |= !agg_items.is_empty ();
3450 return ret;
3453 /* Perform time and size measurement of NODE with the context given in AVALS,
3454 calculate the benefit compared to the node without specialization and store
3455 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3456 context-independent or unused removable parameters and EST_MOVE_COST, the
3457 estimated movement of the considered parameter. */
3459 static void
3460 perform_estimation_of_a_value (cgraph_node *node,
3461 ipa_auto_call_arg_values *avals,
3462 int removable_params_cost, int est_move_cost,
3463 ipcp_value_base *val)
3465 sreal time_benefit;
3466 ipa_call_estimates estimates;
3468 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3470 /* Extern inline functions have no cloning local time benefits because they
3471 will be inlined anyway. The only reason to clone them is if it enables
3472 optimization in any of the functions they call. */
3473 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3474 time_benefit = 0;
3475 else
3476 time_benefit = (estimates.nonspecialized_time - estimates.time)
3477 + (devirtualization_time_bonus (node, avals)
3478 + hint_time_bonus (node, estimates)
3479 + removable_params_cost + est_move_cost);
3481 int size = estimates.size;
3482 gcc_checking_assert (size >=0);
3483 /* The inliner-heuristics based estimates may think that in certain
3484 contexts some functions do not have any size at all but we want
3485 all specializations to have at least a tiny cost, not least not to
3486 divide by zero. */
3487 if (size == 0)
3488 size = 1;
3490 val->local_time_benefit = time_benefit;
3491 val->local_size_cost = size;
3494 /* Get the overall limit oof growth based on parameters extracted from growth.
3495 it does not really make sense to mix functions with different overall growth
3496 limits but it is possible and if it happens, we do not want to select one
3497 limit at random. */
3499 static long
3500 get_max_overall_size (cgraph_node *node)
3502 long max_new_size = orig_overall_size;
3503 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3504 if (max_new_size < large_unit)
3505 max_new_size = large_unit;
3506 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3507 max_new_size += max_new_size * unit_growth / 100 + 1;
3508 return max_new_size;
3511 /* Iterate over known values of parameters of NODE and estimate the local
3512 effects in terms of time and size they have. */
3514 static void
3515 estimate_local_effects (struct cgraph_node *node)
3517 ipa_node_params *info = ipa_node_params_sum->get (node);
3518 int i, count = ipa_get_param_count (info);
3519 bool always_const;
3520 int removable_params_cost;
3522 if (!count || !ipcp_versionable_function_p (node))
3523 return;
3525 if (dump_file && (dump_flags & TDF_DETAILS))
3526 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3528 ipa_auto_call_arg_values avals;
3529 always_const = gather_context_independent_values (info, &avals, true,
3530 &removable_params_cost);
3531 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3532 if (always_const || devirt_bonus
3533 || (removable_params_cost && node->can_change_signature))
3535 struct caller_statistics stats;
3536 ipa_call_estimates estimates;
3538 init_caller_stats (&stats);
3539 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3540 false);
3541 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3542 sreal time = estimates.nonspecialized_time - estimates.time;
3543 time += devirt_bonus;
3544 time += hint_time_bonus (node, estimates);
3545 time += removable_params_cost;
3546 int size = estimates.size - stats.n_calls * removable_params_cost;
3548 if (dump_file)
3549 fprintf (dump_file, " - context independent values, size: %i, "
3550 "time_benefit: %f\n", size, (time).to_double ());
3552 if (size <= 0 || node->local)
3554 info->do_clone_for_all_contexts = true;
3556 if (dump_file)
3557 fprintf (dump_file, " Decided to specialize for all "
3558 "known contexts, code not going to grow.\n");
3560 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3561 stats.count_sum, size))
3563 if (size + overall_size <= get_max_overall_size (node))
3565 info->do_clone_for_all_contexts = true;
3566 overall_size += size;
3568 if (dump_file)
3569 fprintf (dump_file, " Decided to specialize for all "
3570 "known contexts, growth (to %li) deemed "
3571 "beneficial.\n", overall_size);
3573 else if (dump_file && (dump_flags & TDF_DETAILS))
3574 fprintf (dump_file, " Not cloning for all contexts because "
3575 "maximum unit size would be reached with %li.\n",
3576 size + overall_size);
3578 else if (dump_file && (dump_flags & TDF_DETAILS))
3579 fprintf (dump_file, " Not cloning for all contexts because "
3580 "!good_cloning_opportunity_p.\n");
3584 for (i = 0; i < count; i++)
3586 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3587 ipcp_lattice<tree> *lat = &plats->itself;
3588 ipcp_value<tree> *val;
3590 if (lat->bottom
3591 || !lat->values
3592 || avals.m_known_vals[i])
3593 continue;
3595 for (val = lat->values; val; val = val->next)
3597 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3598 avals.m_known_vals[i] = val->value;
3600 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3601 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3602 emc, val);
3604 if (dump_file && (dump_flags & TDF_DETAILS))
3606 fprintf (dump_file, " - estimates for value ");
3607 print_ipcp_constant_value (dump_file, val->value);
3608 fprintf (dump_file, " for ");
3609 ipa_dump_param (dump_file, info, i);
3610 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3611 val->local_time_benefit.to_double (),
3612 val->local_size_cost);
3615 avals.m_known_vals[i] = NULL_TREE;
3618 for (i = 0; i < count; i++)
3620 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3622 if (!plats->virt_call)
3623 continue;
3625 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3626 ipcp_value<ipa_polymorphic_call_context> *val;
3628 if (ctxlat->bottom
3629 || !ctxlat->values
3630 || !avals.m_known_contexts[i].useless_p ())
3631 continue;
3633 for (val = ctxlat->values; val; val = val->next)
3635 avals.m_known_contexts[i] = val->value;
3636 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3637 0, val);
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3641 fprintf (dump_file, " - estimates for polymorphic context ");
3642 print_ipcp_constant_value (dump_file, val->value);
3643 fprintf (dump_file, " for ");
3644 ipa_dump_param (dump_file, info, i);
3645 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3646 val->local_time_benefit.to_double (),
3647 val->local_size_cost);
3650 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3653 for (i = 0; i < count; i++)
3655 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3657 if (plats->aggs_bottom || !plats->aggs)
3658 continue;
3660 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3661 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3663 ipcp_value<tree> *val;
3664 if (aglat->bottom || !aglat->values
3665 /* If the following is true, the one value is in known_aggs. */
3666 || (!plats->aggs_contain_variable
3667 && aglat->is_single_const ()))
3668 continue;
3670 for (val = aglat->values; val; val = val->next)
3672 struct ipa_agg_value item;
3674 item.offset = aglat->offset;
3675 item.value = val->value;
3676 agg->items.safe_push (item);
3678 perform_estimation_of_a_value (node, &avals,
3679 removable_params_cost, 0, val);
3681 if (dump_file && (dump_flags & TDF_DETAILS))
3683 fprintf (dump_file, " - estimates for value ");
3684 print_ipcp_constant_value (dump_file, val->value);
3685 fprintf (dump_file, " for ");
3686 ipa_dump_param (dump_file, info, i);
3687 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3688 "]: time_benefit: %g, size: %i\n",
3689 plats->aggs_by_ref ? "ref " : "",
3690 aglat->offset,
3691 val->local_time_benefit.to_double (),
3692 val->local_size_cost);
3695 agg->items.pop ();
3702 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3703 topological sort of values. */
3705 template <typename valtype>
3706 void
3707 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3709 ipcp_value_source<valtype> *src;
3711 if (cur_val->dfs)
3712 return;
3714 dfs_counter++;
3715 cur_val->dfs = dfs_counter;
3716 cur_val->low_link = dfs_counter;
3718 cur_val->topo_next = stack;
3719 stack = cur_val;
3720 cur_val->on_stack = true;
3722 for (src = cur_val->sources; src; src = src->next)
3723 if (src->val)
3725 if (src->val->dfs == 0)
3727 add_val (src->val);
3728 if (src->val->low_link < cur_val->low_link)
3729 cur_val->low_link = src->val->low_link;
3731 else if (src->val->on_stack
3732 && src->val->dfs < cur_val->low_link)
3733 cur_val->low_link = src->val->dfs;
3736 if (cur_val->dfs == cur_val->low_link)
3738 ipcp_value<valtype> *v, *scc_list = NULL;
3742 v = stack;
3743 stack = v->topo_next;
3744 v->on_stack = false;
3745 v->scc_no = cur_val->dfs;
3747 v->scc_next = scc_list;
3748 scc_list = v;
3750 while (v != cur_val);
3752 cur_val->topo_next = values_topo;
3753 values_topo = cur_val;
3757 /* Add all values in lattices associated with NODE to the topological sort if
3758 they are not there yet. */
3760 static void
3761 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3763 ipa_node_params *info = ipa_node_params_sum->get (node);
3764 int i, count = ipa_get_param_count (info);
3766 for (i = 0; i < count; i++)
3768 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3769 ipcp_lattice<tree> *lat = &plats->itself;
3770 struct ipcp_agg_lattice *aglat;
3772 if (!lat->bottom)
3774 ipcp_value<tree> *val;
3775 for (val = lat->values; val; val = val->next)
3776 topo->constants.add_val (val);
3779 if (!plats->aggs_bottom)
3780 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3781 if (!aglat->bottom)
3783 ipcp_value<tree> *val;
3784 for (val = aglat->values; val; val = val->next)
3785 topo->constants.add_val (val);
3788 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3789 if (!ctxlat->bottom)
3791 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3792 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3793 topo->contexts.add_val (ctxval);
3798 /* One pass of constants propagation along the call graph edges, from callers
3799 to callees (requires topological ordering in TOPO), iterate over strongly
3800 connected components. */
3802 static void
3803 propagate_constants_topo (class ipa_topo_info *topo)
3805 int i;
3807 for (i = topo->nnodes - 1; i >= 0; i--)
3809 unsigned j;
3810 struct cgraph_node *v, *node = topo->order[i];
3811 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3813 /* First, iteratively propagate within the strongly connected component
3814 until all lattices stabilize. */
3815 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3816 if (v->has_gimple_body_p ())
3818 if (opt_for_fn (v->decl, flag_ipa_cp)
3819 && opt_for_fn (v->decl, optimize))
3820 push_node_to_stack (topo, v);
3821 /* When V is not optimized, we can not push it to stack, but
3822 still we need to set all its callees lattices to bottom. */
3823 else
3825 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3826 propagate_constants_across_call (cs);
3830 v = pop_node_from_stack (topo);
3831 while (v)
3833 struct cgraph_edge *cs;
3834 class ipa_node_params *info = NULL;
3835 bool self_scc = true;
3837 for (cs = v->callees; cs; cs = cs->next_callee)
3838 if (ipa_edge_within_scc (cs))
3840 cgraph_node *callee = cs->callee->function_symbol ();
3842 if (v != callee)
3843 self_scc = false;
3845 if (!info)
3847 info = ipa_node_params_sum->get (v);
3848 info->node_within_scc = true;
3851 if (propagate_constants_across_call (cs))
3852 push_node_to_stack (topo, callee);
3855 if (info)
3856 info->node_is_self_scc = self_scc;
3858 v = pop_node_from_stack (topo);
3861 /* Afterwards, propagate along edges leading out of the SCC, calculates
3862 the local effects of the discovered constants and all valid values to
3863 their topological sort. */
3864 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3865 if (v->has_gimple_body_p ()
3866 && opt_for_fn (v->decl, flag_ipa_cp)
3867 && opt_for_fn (v->decl, optimize))
3869 struct cgraph_edge *cs;
3871 estimate_local_effects (v);
3872 add_all_node_vals_to_toposort (v, topo);
3873 for (cs = v->callees; cs; cs = cs->next_callee)
3874 if (!ipa_edge_within_scc (cs))
3875 propagate_constants_across_call (cs);
3877 cycle_nodes.release ();
3881 /* Propagate the estimated effects of individual values along the topological
3882 from the dependent values to those they depend on. */
3884 template <typename valtype>
3885 void
3886 value_topo_info<valtype>::propagate_effects ()
3888 ipcp_value<valtype> *base;
3889 hash_set<ipcp_value<valtype> *> processed_srcvals;
3891 for (base = values_topo; base; base = base->topo_next)
3893 ipcp_value_source<valtype> *src;
3894 ipcp_value<valtype> *val;
3895 sreal time = 0;
3896 HOST_WIDE_INT size = 0;
3898 for (val = base; val; val = val->scc_next)
3900 time = time + val->local_time_benefit + val->prop_time_benefit;
3901 size = size + val->local_size_cost + val->prop_size_cost;
3904 for (val = base; val; val = val->scc_next)
3906 processed_srcvals.empty ();
3907 for (src = val->sources; src; src = src->next)
3908 if (src->val
3909 && src->cs->maybe_hot_p ())
3911 if (!processed_srcvals.add (src->val))
3913 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3914 if (prop_size < INT_MAX)
3915 src->val->prop_size_cost = prop_size;
3916 else
3917 continue;
3920 int special_factor = 1;
3921 if (val->same_scc (src->val))
3922 special_factor
3923 = opt_for_fn(src->cs->caller->decl,
3924 param_ipa_cp_recursive_freq_factor);
3925 else if (val->self_recursion_generated_p ()
3926 && (src->cs->callee->function_symbol ()
3927 == src->cs->caller))
3929 int max_recur_gen_depth
3930 = opt_for_fn(src->cs->caller->decl,
3931 param_ipa_cp_max_recursive_depth);
3932 special_factor = max_recur_gen_depth
3933 - val->self_recursion_generated_level + 1;
3936 src->val->prop_time_benefit
3937 += time * special_factor * src->cs->sreal_frequency ();
3940 if (size < INT_MAX)
3942 val->prop_time_benefit = time;
3943 val->prop_size_cost = size;
3945 else
3947 val->prop_time_benefit = 0;
3948 val->prop_size_cost = 0;
3954 /* Callback for qsort to sort counts of all edges. */
3956 static int
3957 compare_edge_profile_counts (const void *a, const void *b)
3959 const profile_count *cnt1 = (const profile_count *) a;
3960 const profile_count *cnt2 = (const profile_count *) b;
3962 if (*cnt1 < *cnt2)
3963 return 1;
3964 if (*cnt1 > *cnt2)
3965 return -1;
3966 return 0;
3970 /* Propagate constants, polymorphic contexts and their effects from the
3971 summaries interprocedurally. */
3973 static void
3974 ipcp_propagate_stage (class ipa_topo_info *topo)
3976 struct cgraph_node *node;
3978 if (dump_file)
3979 fprintf (dump_file, "\n Propagating constants:\n\n");
3981 base_count = profile_count::uninitialized ();
3983 bool compute_count_base = false;
3984 unsigned base_count_pos_percent = 0;
3985 FOR_EACH_DEFINED_FUNCTION (node)
3987 if (node->has_gimple_body_p ()
3988 && opt_for_fn (node->decl, flag_ipa_cp)
3989 && opt_for_fn (node->decl, optimize))
3991 ipa_node_params *info = ipa_node_params_sum->get (node);
3992 determine_versionability (node, info);
3994 unsigned nlattices = ipa_get_param_count (info);
3995 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
3996 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
3997 initialize_node_lattices (node);
3999 ipa_size_summary *s = ipa_size_summaries->get (node);
4000 if (node->definition && !node->alias && s != NULL)
4001 overall_size += s->self_size;
4002 if (node->count.ipa ().initialized_p ())
4004 compute_count_base = true;
4005 unsigned pos_percent = opt_for_fn (node->decl,
4006 param_ipa_cp_profile_count_base);
4007 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4011 if (compute_count_base)
4013 auto_vec<profile_count> all_edge_counts;
4014 all_edge_counts.reserve_exact (symtab->edges_count);
4015 FOR_EACH_DEFINED_FUNCTION (node)
4016 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4018 profile_count count = cs->count.ipa ();
4019 if (!(count > profile_count::zero ()))
4020 continue;
4022 enum availability avail;
4023 cgraph_node *tgt
4024 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4025 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4026 if (info && info->versionable)
4027 all_edge_counts.quick_push (count);
4030 if (!all_edge_counts.is_empty ())
4032 gcc_assert (base_count_pos_percent <= 100);
4033 all_edge_counts.qsort (compare_edge_profile_counts);
4035 unsigned base_count_pos
4036 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4037 base_count = all_edge_counts[base_count_pos];
4039 if (dump_file)
4041 fprintf (dump_file, "\nSelected base_count from %u edges at "
4042 "position %u, arriving at: ", all_edge_counts.length (),
4043 base_count_pos);
4044 base_count.dump (dump_file);
4045 fprintf (dump_file, "\n");
4048 else if (dump_file)
4049 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4050 "continuing as if without profile feedback.\n");
4053 orig_overall_size = overall_size;
4055 if (dump_file)
4056 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4058 propagate_constants_topo (topo);
4059 if (flag_checking)
4060 ipcp_verify_propagated_values ();
4061 topo->constants.propagate_effects ();
4062 topo->contexts.propagate_effects ();
4064 if (dump_file)
4066 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4067 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4071 /* Discover newly direct outgoing edges from NODE which is a new clone with
4072 known KNOWN_CSTS and make them direct. */
4074 static void
4075 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4076 vec<tree> known_csts,
4077 vec<ipa_polymorphic_call_context>
4078 known_contexts,
4079 struct ipa_agg_replacement_value *aggvals)
4081 struct cgraph_edge *ie, *next_ie;
4082 bool found = false;
4084 for (ie = node->indirect_calls; ie; ie = next_ie)
4086 tree target;
4087 bool speculative;
4089 next_ie = ie->next_callee;
4090 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4091 vNULL, aggvals, &speculative);
4092 if (target)
4094 bool agg_contents = ie->indirect_info->agg_contents;
4095 bool polymorphic = ie->indirect_info->polymorphic;
4096 int param_index = ie->indirect_info->param_index;
4097 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4098 speculative);
4099 found = true;
4101 if (cs && !agg_contents && !polymorphic)
4103 ipa_node_params *info = ipa_node_params_sum->get (node);
4104 int c = ipa_get_controlled_uses (info, param_index);
4105 if (c != IPA_UNDESCRIBED_USE
4106 && !ipa_get_param_load_dereferenced (info, param_index))
4108 struct ipa_ref *to_del;
4110 c--;
4111 ipa_set_controlled_uses (info, param_index, c);
4112 if (dump_file && (dump_flags & TDF_DETAILS))
4113 fprintf (dump_file, " controlled uses count of param "
4114 "%i bumped down to %i\n", param_index, c);
4115 if (c == 0
4116 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4118 if (dump_file && (dump_flags & TDF_DETAILS))
4119 fprintf (dump_file, " and even removing its "
4120 "cloning-created reference\n");
4121 to_del->remove_reference ();
4127 /* Turning calls to direct calls will improve overall summary. */
4128 if (found)
4129 ipa_update_overall_fn_summary (node);
4132 class edge_clone_summary;
4133 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4135 /* Edge clone summary. */
4137 class edge_clone_summary
4139 public:
4140 /* Default constructor. */
4141 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4143 /* Default destructor. */
4144 ~edge_clone_summary ()
4146 if (prev_clone)
4147 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4148 if (next_clone)
4149 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4152 cgraph_edge *prev_clone;
4153 cgraph_edge *next_clone;
4156 class edge_clone_summary_t:
4157 public call_summary <edge_clone_summary *>
4159 public:
4160 edge_clone_summary_t (symbol_table *symtab):
4161 call_summary <edge_clone_summary *> (symtab)
4163 m_initialize_when_cloning = true;
4166 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4167 edge_clone_summary *src_data,
4168 edge_clone_summary *dst_data);
4171 /* Edge duplication hook. */
4173 void
4174 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4175 edge_clone_summary *src_data,
4176 edge_clone_summary *dst_data)
4178 if (src_data->next_clone)
4179 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4180 dst_data->prev_clone = src_edge;
4181 dst_data->next_clone = src_data->next_clone;
4182 src_data->next_clone = dst_edge;
4185 /* Return true is CS calls DEST or its clone for all contexts. When
4186 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4187 edges from/to an all-context clone. */
4189 static bool
4190 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4191 bool allow_recursion_to_clone)
4193 enum availability availability;
4194 cgraph_node *callee = cs->callee->function_symbol (&availability);
4196 if (availability <= AVAIL_INTERPOSABLE)
4197 return false;
4198 if (callee == dest)
4199 return true;
4200 if (!allow_recursion_to_clone && cs->caller == callee)
4201 return false;
4203 ipa_node_params *info = ipa_node_params_sum->get (callee);
4204 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4207 /* Return true if edge CS does bring about the value described by SRC to
4208 DEST_VAL of node DEST or its clone for all contexts. */
4210 static bool
4211 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4212 cgraph_node *dest, ipcp_value<tree> *dest_val)
4214 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4216 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4217 || caller_info->node_dead)
4218 return false;
4220 if (!src->val)
4221 return true;
4223 if (caller_info->ipcp_orig_node)
4225 tree t;
4226 if (src->offset == -1)
4227 t = caller_info->known_csts[src->index];
4228 else
4229 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4230 return (t != NULL_TREE
4231 && values_equal_for_ipcp_p (src->val->value, t));
4233 else
4235 if (src->val == dest_val)
4236 return true;
4238 struct ipcp_agg_lattice *aglat;
4239 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4240 src->index);
4241 if (src->offset == -1)
4242 return (plats->itself.is_single_const ()
4243 && values_equal_for_ipcp_p (src->val->value,
4244 plats->itself.values->value));
4245 else
4247 if (plats->aggs_bottom || plats->aggs_contain_variable)
4248 return false;
4249 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4250 if (aglat->offset == src->offset)
4251 return (aglat->is_single_const ()
4252 && values_equal_for_ipcp_p (src->val->value,
4253 aglat->values->value));
4255 return false;
4259 /* Return true if edge CS does bring about the value described by SRC to
4260 DST_VAL of node DEST or its clone for all contexts. */
4262 static bool
4263 cgraph_edge_brings_value_p (cgraph_edge *cs,
4264 ipcp_value_source<ipa_polymorphic_call_context> *src,
4265 cgraph_node *dest,
4266 ipcp_value<ipa_polymorphic_call_context> *)
4268 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4270 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4271 || caller_info->node_dead)
4272 return false;
4273 if (!src->val)
4274 return true;
4276 if (caller_info->ipcp_orig_node)
4277 return (caller_info->known_contexts.length () > (unsigned) src->index)
4278 && values_equal_for_ipcp_p (src->val->value,
4279 caller_info->known_contexts[src->index]);
4281 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4282 src->index);
4283 return plats->ctxlat.is_single_const ()
4284 && values_equal_for_ipcp_p (src->val->value,
4285 plats->ctxlat.values->value);
4288 /* Get the next clone in the linked list of clones of an edge. */
4290 static inline struct cgraph_edge *
4291 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4293 edge_clone_summary *s = edge_clone_summaries->get (cs);
4294 return s != NULL ? s->next_clone : NULL;
4297 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4298 of them is viable and hot, return true. In that case, for those that still
4299 hold, add their edge frequency and their number and cumulative profile
4300 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4301 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4303 template <typename valtype>
4304 static bool
4305 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4306 sreal *freq_sum, int *caller_count,
4307 profile_count *rec_count_sum,
4308 profile_count *nonrec_count_sum)
4310 ipcp_value_source<valtype> *src;
4311 sreal freq = 0;
4312 int count = 0;
4313 profile_count rec_cnt = profile_count::zero ();
4314 profile_count nonrec_cnt = profile_count::zero ();
4315 bool hot = false;
4316 bool non_self_recursive = false;
4318 for (src = val->sources; src; src = src->next)
4320 struct cgraph_edge *cs = src->cs;
4321 while (cs)
4323 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4325 count++;
4326 freq += cs->sreal_frequency ();
4327 hot |= cs->maybe_hot_p ();
4328 if (cs->caller != dest)
4330 non_self_recursive = true;
4331 if (cs->count.ipa ().initialized_p ())
4332 rec_cnt += cs->count.ipa ();
4334 else if (cs->count.ipa ().initialized_p ())
4335 nonrec_cnt += cs->count.ipa ();
4337 cs = get_next_cgraph_edge_clone (cs);
4341 /* If the only edges bringing a value are self-recursive ones, do not bother
4342 evaluating it. */
4343 if (!non_self_recursive)
4344 return false;
4346 *freq_sum = freq;
4347 *caller_count = count;
4348 *rec_count_sum = rec_cnt;
4349 *nonrec_count_sum = nonrec_cnt;
4351 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4353 struct cgraph_edge *cs;
4355 /* Cold non-SCC source edge could trigger hot recursive execution of
4356 function. Consider the case as hot and rely on following cost model
4357 computation to further select right one. */
4358 for (cs = dest->callers; cs; cs = cs->next_caller)
4359 if (cs->caller == dest && cs->maybe_hot_p ())
4360 return true;
4363 return hot;
4366 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4367 to let a non-self-recursive caller be the first element. Thus, we can
4368 simplify intersecting operations on values that arrive from all of these
4369 callers, especially when there exists self-recursive call. Return true if
4370 this kind of adjustment is possible. */
4372 static bool
4373 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4374 cgraph_node *node)
4376 for (unsigned i = 0; i < callers.length (); i++)
4378 cgraph_edge *cs = callers[i];
4380 if (cs->caller != node)
4382 if (i > 0)
4384 callers[i] = callers[0];
4385 callers[0] = cs;
4387 return true;
4390 return false;
4393 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4394 is assumed their number is known and equal to CALLER_COUNT. */
4396 template <typename valtype>
4397 static vec<cgraph_edge *>
4398 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4399 int caller_count)
4401 ipcp_value_source<valtype> *src;
4402 vec<cgraph_edge *> ret;
4404 ret.create (caller_count);
4405 for (src = val->sources; src; src = src->next)
4407 struct cgraph_edge *cs = src->cs;
4408 while (cs)
4410 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4411 ret.quick_push (cs);
4412 cs = get_next_cgraph_edge_clone (cs);
4416 if (caller_count > 1)
4417 adjust_callers_for_value_intersection (ret, dest);
4419 return ret;
4422 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4423 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4424 should be set to true when the reference created for the constant should be
4425 a load one and not an address one because the corresponding parameter p is
4426 only used as *p. */
4428 static struct ipa_replace_map *
4429 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4430 bool force_load_ref)
4432 struct ipa_replace_map *replace_map;
4434 replace_map = ggc_alloc<ipa_replace_map> ();
4435 if (dump_file)
4437 fprintf (dump_file, " replacing ");
4438 ipa_dump_param (dump_file, info, parm_num);
4440 fprintf (dump_file, " with const ");
4441 print_generic_expr (dump_file, value);
4443 if (force_load_ref)
4444 fprintf (dump_file, " - forcing load reference\n");
4445 else
4446 fprintf (dump_file, "\n");
4448 replace_map->parm_num = parm_num;
4449 replace_map->new_tree = value;
4450 replace_map->force_load_ref = force_load_ref;
4451 return replace_map;
4454 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4455 one, otherwise it will be referred to as the original node. */
4457 static void
4458 dump_profile_updates (cgraph_node *node, bool spec)
4460 if (spec)
4461 fprintf (dump_file, " setting count of the specialized node %s to ",
4462 node->dump_name ());
4463 else
4464 fprintf (dump_file, " setting count of the original node %s to ",
4465 node->dump_name ());
4467 node->count.dump (dump_file);
4468 fprintf (dump_file, "\n");
4469 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4471 fprintf (dump_file, " edge to %s has count ",
4472 cs->callee->dump_name ());
4473 cs->count.dump (dump_file);
4474 fprintf (dump_file, "\n");
4478 /* With partial train run we do not want to assume that original's count is
4479 zero whenever we redurect all executed edges to clone. Simply drop profile
4480 to local one in this case. In eany case, return the new value. ORIG_NODE
4481 is the original node and its count has not been updaed yet. */
4483 profile_count
4484 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4486 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4487 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4488 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4489 remainder = remainder.guessed_local ();
4491 return remainder;
4494 /* Structure to sum counts coming from nodes other than the original node and
4495 its clones. */
4497 struct gather_other_count_struct
4499 cgraph_node *orig;
4500 profile_count other_count;
4503 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4504 counts that come from non-self-recursive calls.. */
4506 static bool
4507 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4509 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4510 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4511 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4512 desc->other_count += cs->count.ipa ();
4513 return false;
4516 /* Structure to help analyze if we need to boost counts of some clones of some
4517 non-recursive edges to match the new callee count. */
4519 struct desc_incoming_count_struct
4521 cgraph_node *orig;
4522 hash_set <cgraph_edge *> *processed_edges;
4523 profile_count count;
4524 unsigned unproc_orig_rec_edges;
4527 /* Go over edges calling NODE and its thunks and gather information about
4528 incoming counts so that we know if we need to make any adjustments. */
4530 static void
4531 analyze_clone_icoming_counts (cgraph_node *node,
4532 desc_incoming_count_struct *desc)
4534 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4535 if (cs->caller->thunk)
4537 analyze_clone_icoming_counts (cs->caller, desc);
4538 continue;
4540 else
4542 if (cs->count.initialized_p ())
4543 desc->count += cs->count.ipa ();
4544 if (!desc->processed_edges->contains (cs)
4545 && cs->caller->clone_of == desc->orig)
4546 desc->unproc_orig_rec_edges++;
4550 /* If caller edge counts of a clone created for a self-recursive arithmetic
4551 jump function must be adjusted because it is coming from a the "seed" clone
4552 for the first value and so has been excessively scaled back as if it was not
4553 a recursive call, adjust it so that the incoming counts of NODE match its
4554 count. NODE is the node or its thunk. */
4556 static void
4557 adjust_clone_incoming_counts (cgraph_node *node,
4558 desc_incoming_count_struct *desc)
4560 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4561 if (cs->caller->thunk)
4563 adjust_clone_incoming_counts (cs->caller, desc);
4564 profile_count sum = profile_count::zero ();
4565 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4566 if (e->count.initialized_p ())
4567 sum += e->count.ipa ();
4568 cs->count = cs->count.combine_with_ipa_count (sum);
4570 else if (!desc->processed_edges->contains (cs)
4571 && cs->caller->clone_of == desc->orig)
4573 cs->count += desc->count;
4574 if (dump_file)
4576 fprintf (dump_file, " Adjusted count of an incoming edge of "
4577 "a clone %s -> %s to ", cs->caller->dump_name (),
4578 cs->callee->dump_name ());
4579 cs->count.dump (dump_file);
4580 fprintf (dump_file, "\n");
4585 /* When ORIG_NODE has been cloned for values which have been generated fora
4586 self-recursive call as a result of an arithmetic pass-through
4587 jump-functions, adjust its count together with counts of all such clones in
4588 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4590 The function sums the counts of the original node and all its clones that
4591 cannot be attributed to a specific clone because it comes from a
4592 non-recursive edge. This sum is then evenly divided between the clones and
4593 on top of that each one gets all the counts which can be attributed directly
4594 to it. */
4596 static void
4597 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4598 const vec<cgraph_node *> &self_gen_clones)
4600 profile_count redist_sum = orig_node->count.ipa ();
4601 if (!(redist_sum > profile_count::zero ()))
4602 return;
4604 if (dump_file)
4605 fprintf (dump_file, " Updating profile of self recursive clone "
4606 "series\n");
4608 gather_other_count_struct gocs;
4609 gocs.orig = orig_node;
4610 gocs.other_count = profile_count::zero ();
4612 auto_vec <profile_count, 8> other_edges_count;
4613 for (cgraph_node *n : self_gen_clones)
4615 gocs.other_count = profile_count::zero ();
4616 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4617 &gocs, false);
4618 other_edges_count.safe_push (gocs.other_count);
4619 redist_sum -= gocs.other_count;
4622 hash_set<cgraph_edge *> processed_edges;
4623 unsigned i = 0;
4624 for (cgraph_node *n : self_gen_clones)
4626 profile_count orig_count = n->count;
4627 profile_count new_count
4628 = (redist_sum.apply_scale (1, self_gen_clones.length ())
4629 + other_edges_count[i]);
4630 new_count = lenient_count_portion_handling (new_count, orig_node);
4631 n->count = new_count;
4632 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4633 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4635 cs->count = cs->count.apply_scale (new_count, orig_count);
4636 processed_edges.add (cs);
4638 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4639 cs->count = cs->count.apply_scale (new_count, orig_count);
4641 i++;
4644 /* There are still going to be edges to ORIG_NODE that have one or more
4645 clones coming from another node clone in SELF_GEN_CLONES and which we
4646 scaled by the same amount, which means that the total incoming sum of
4647 counts to ORIG_NODE will be too high, scale such edges back. */
4648 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4650 if (cs->callee->ultimate_alias_target () == orig_node)
4652 unsigned den = 0;
4653 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4654 if (e->callee->ultimate_alias_target () == orig_node
4655 && processed_edges.contains (e))
4656 den++;
4657 if (den > 0)
4658 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4659 if (e->callee->ultimate_alias_target () == orig_node
4660 && processed_edges.contains (e))
4661 e->count = e->count.apply_scale (1, den);
4665 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4666 along self-recursive edges are likely to have fairly low count and so
4667 edges from them to nodes in the self_gen_clones do not correspond to the
4668 artificially distributed count of the nodes, the total sum of incoming
4669 edges to some clones might be too low. Detect this situation and correct
4670 it. */
4671 for (cgraph_node *n : self_gen_clones)
4673 if (!(n->count.ipa () > profile_count::zero ()))
4674 continue;
4676 desc_incoming_count_struct desc;
4677 desc.orig = orig_node;
4678 desc.processed_edges = &processed_edges;
4679 desc.count = profile_count::zero ();
4680 desc.unproc_orig_rec_edges = 0;
4681 analyze_clone_icoming_counts (n, &desc);
4683 if (n->count.differs_from_p (desc.count))
4685 if (n->count > desc.count
4686 && desc.unproc_orig_rec_edges > 0)
4688 desc.count = n->count - desc.count;
4689 desc.count
4690 = desc.count.apply_scale (1, desc.unproc_orig_rec_edges);
4691 adjust_clone_incoming_counts (n, &desc);
4693 else if (dump_file)
4694 fprintf (dump_file,
4695 " Unable to fix up incoming counts for %s.\n",
4696 n->dump_name ());
4700 if (dump_file)
4701 for (cgraph_node *n : self_gen_clones)
4702 dump_profile_updates (n, n != orig_node);
4703 return;
4706 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4707 their profile information to reflect this. This function should not be used
4708 for clones generated for arithmetic pass-through jump functions on a
4709 self-recursive call graph edge, that situation is handled by
4710 update_counts_for_self_gen_clones. */
4712 static void
4713 update_profiling_info (struct cgraph_node *orig_node,
4714 struct cgraph_node *new_node)
4716 struct caller_statistics stats;
4717 profile_count new_sum;
4718 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4720 if (!(orig_node_count > profile_count::zero ()))
4721 return;
4723 if (dump_file)
4725 fprintf (dump_file, " Updating profile from original count: ");
4726 orig_node_count.dump (dump_file);
4727 fprintf (dump_file, "\n");
4730 init_caller_stats (&stats, new_node);
4731 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4732 false);
4733 new_sum = stats.count_sum;
4735 if (new_sum > orig_node_count)
4737 /* TODO: Perhaps this should be gcc_unreachable ()? */
4738 remainder = profile_count::zero ().guessed_local ();
4740 else if (stats.rec_count_sum.nonzero_p ())
4742 int new_nonrec_calls = stats.n_nonrec_calls;
4743 /* There are self-recursive edges which are likely to bring in the
4744 majority of calls but which we must divide in between the original and
4745 new node. */
4746 init_caller_stats (&stats, orig_node);
4747 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4748 &stats, false);
4749 int orig_nonrec_calls = stats.n_nonrec_calls;
4750 profile_count orig_nonrec_call_count = stats.count_sum;
4752 if (orig_node->local)
4754 if (!orig_nonrec_call_count.nonzero_p ())
4756 if (dump_file)
4757 fprintf (dump_file, " The original is local and the only "
4758 "incoming edges from non-dead callers with nonzero "
4759 "counts are self-recursive, assuming it is cold.\n");
4760 /* The NEW_NODE count and counts of all its outgoing edges
4761 are still unmodified copies of ORIG_NODE's. Just clear
4762 the latter and bail out. */
4763 profile_count zero;
4764 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4765 zero = profile_count::zero ().guessed_local ();
4766 else
4767 zero = profile_count::adjusted_zero ();
4768 orig_node->count = zero;
4769 for (cgraph_edge *cs = orig_node->callees;
4771 cs = cs->next_callee)
4772 cs->count = zero;
4773 for (cgraph_edge *cs = orig_node->indirect_calls;
4775 cs = cs->next_callee)
4776 cs->count = zero;
4777 return;
4780 else
4782 /* Let's behave as if there was another caller that accounts for all
4783 the calls that were either indirect or from other compilation
4784 units. */
4785 orig_nonrec_calls++;
4786 profile_count pretend_caller_count
4787 = (orig_node_count - new_sum - orig_nonrec_call_count
4788 - stats.rec_count_sum);
4789 orig_nonrec_call_count += pretend_caller_count;
4792 /* Divide all "unexplained" counts roughly proportionally to sums of
4793 counts of non-recursive calls.
4795 We put rather arbitrary limits on how many counts we claim because the
4796 number of non-self-recursive incoming count is only a rough guideline
4797 and there are cases (such as mcf) where using it blindly just takes
4798 too many. And if lattices are considered in the opposite order we
4799 could also take too few. */
4800 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4802 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4803 profile_count new_part
4804 = MAX(MIN (unexp.apply_scale (new_sum,
4805 new_sum + orig_nonrec_call_count),
4806 unexp.apply_scale (limit_den - 1, limit_den)),
4807 unexp.apply_scale (new_nonrec_calls, limit_den));
4808 if (dump_file)
4810 fprintf (dump_file, " Claiming ");
4811 new_part.dump (dump_file);
4812 fprintf (dump_file, " of unexplained ");
4813 unexp.dump (dump_file);
4814 fprintf (dump_file, " counts because of self-recursive "
4815 "calls\n");
4817 new_sum += new_part;
4818 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4819 orig_node);
4821 else
4822 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4823 orig_node);
4825 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4826 new_node->count = new_sum;
4827 orig_node->count = remainder;
4829 profile_count orig_new_node_count = orig_node_count;
4830 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4831 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4832 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4833 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4834 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4836 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4837 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4838 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4839 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4840 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4842 if (dump_file)
4844 dump_profile_updates (new_node, true);
4845 dump_profile_updates (orig_node, false);
4849 /* Update the respective profile of specialized NEW_NODE and the original
4850 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4851 have been redirected to the specialized version. */
4853 static void
4854 update_specialized_profile (struct cgraph_node *new_node,
4855 struct cgraph_node *orig_node,
4856 profile_count redirected_sum)
4858 struct cgraph_edge *cs;
4859 profile_count new_node_count, orig_node_count = orig_node->count;
4861 if (dump_file)
4863 fprintf (dump_file, " the sum of counts of redirected edges is ");
4864 redirected_sum.dump (dump_file);
4865 fprintf (dump_file, "\n");
4867 if (!(orig_node_count > profile_count::zero ()))
4868 return;
4870 gcc_assert (orig_node_count >= redirected_sum);
4872 new_node_count = new_node->count;
4873 new_node->count += redirected_sum;
4874 orig_node->count -= redirected_sum;
4876 for (cs = new_node->callees; cs; cs = cs->next_callee)
4877 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4879 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4881 profile_count dec = cs->count.apply_scale (redirected_sum,
4882 orig_node_count);
4883 cs->count -= dec;
4886 if (dump_file)
4888 dump_profile_updates (new_node, true);
4889 dump_profile_updates (orig_node, false);
4893 static void adjust_references_in_caller (cgraph_edge *cs,
4894 symtab_node *symbol, int index);
4896 /* Simple structure to pass a symbol and index (with same meaning as parameters
4897 of adjust_references_in_caller) through a void* parameter of a
4898 call_for_symbol_thunks_and_aliases callback. */
4899 struct symbol_and_index_together
4901 symtab_node *symbol;
4902 int index;
4905 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4906 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4907 static bool
4908 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4910 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4911 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4912 if (!cs->caller->thunk)
4913 adjust_references_in_caller (cs, pack->symbol, pack->index);
4914 return false;
4917 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4918 variable which is only dereferenced and which is represented by SYMBOL. See
4919 if we can remove ADDR reference in callers assosiated witht the call. */
4921 static void
4922 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4924 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4925 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4926 if (jfunc->type == IPA_JF_CONST)
4928 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4929 cs->lto_stmt_uid);
4930 if (!to_del)
4931 return;
4932 to_del->remove_reference ();
4933 if (dump_file)
4934 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4935 cs->caller->dump_name (), symbol->dump_name ());
4936 return;
4939 if (jfunc->type != IPA_JF_PASS_THROUGH
4940 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
4941 return;
4943 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4944 cgraph_node *caller = cs->caller;
4945 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4946 /* TODO: This consistency check may be too big and not really
4947 that useful. Consider removing it. */
4948 tree cst;
4949 if (caller_info->ipcp_orig_node)
4950 cst = caller_info->known_csts[fidx];
4951 else
4953 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4954 gcc_assert (lat->is_single_const ());
4955 cst = lat->values->value;
4957 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4958 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4959 == symbol));
4961 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4962 if (cuses == IPA_UNDESCRIBED_USE)
4963 return;
4964 gcc_assert (cuses > 0);
4965 cuses--;
4966 ipa_set_controlled_uses (caller_info, fidx, cuses);
4967 if (cuses)
4968 return;
4970 if (caller_info->ipcp_orig_node)
4972 /* Cloning machinery has created a reference here, we need to either
4973 remove it or change it to a read one. */
4974 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
4975 if (to_del && to_del->use == IPA_REF_ADDR)
4977 to_del->remove_reference ();
4978 if (dump_file)
4979 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4980 cs->caller->dump_name (), symbol->dump_name ());
4981 if (ipa_get_param_load_dereferenced (caller_info, fidx))
4983 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
4984 if (dump_file)
4985 fprintf (dump_file,
4986 " ...and replaced it with LOAD one.\n");
4991 symbol_and_index_together pack;
4992 pack.symbol = symbol;
4993 pack.index = fidx;
4994 if (caller->can_change_signature)
4995 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
4996 &pack, true);
5000 /* Return true if we would like to remove a parameter from NODE when cloning it
5001 with KNOWN_CSTS scalar constants. */
5003 static bool
5004 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5006 auto_vec<bool, 16> surviving;
5007 bool filled_vec = false;
5008 ipa_node_params *info = ipa_node_params_sum->get (node);
5009 int i, count = ipa_get_param_count (info);
5011 for (i = 0; i < count; i++)
5013 if (!known_csts[i] && ipa_is_param_used (info, i))
5014 continue;
5016 if (!filled_vec)
5018 clone_info *info = clone_info::get (node);
5019 if (!info || !info->param_adjustments)
5020 return true;
5021 info->param_adjustments->get_surviving_params (&surviving);
5022 filled_vec = true;
5024 if (surviving.length() < (unsigned) i && surviving[i])
5025 return true;
5027 return false;
5030 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5031 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5032 redirect all edges in CALLERS to it. */
5034 static struct cgraph_node *
5035 create_specialized_node (struct cgraph_node *node,
5036 vec<tree> known_csts,
5037 vec<ipa_polymorphic_call_context> known_contexts,
5038 struct ipa_agg_replacement_value *aggvals,
5039 vec<cgraph_edge *> &callers)
5041 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5042 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5043 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5044 struct ipa_agg_replacement_value *av;
5045 struct cgraph_node *new_node;
5046 int i, count = ipa_get_param_count (info);
5047 clone_info *cinfo = clone_info::get (node);
5048 ipa_param_adjustments *old_adjustments = cinfo
5049 ? cinfo->param_adjustments : NULL;
5050 ipa_param_adjustments *new_adjustments;
5051 gcc_assert (!info->ipcp_orig_node);
5052 gcc_assert (node->can_change_signature
5053 || !old_adjustments);
5055 if (old_adjustments)
5057 /* At the moment all IPA optimizations should use the number of
5058 parameters of the prevailing decl as the m_always_copy_start.
5059 Handling any other value would complicate the code below, so for the
5060 time bing let's only assert it is so. */
5061 gcc_assert (old_adjustments->m_always_copy_start == count
5062 || old_adjustments->m_always_copy_start < 0);
5063 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5064 for (i = 0; i < old_adj_count; i++)
5066 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5067 if (!node->can_change_signature
5068 || old_adj->op != IPA_PARAM_OP_COPY
5069 || (!known_csts[old_adj->base_index]
5070 && ipa_is_param_used (info, old_adj->base_index)))
5072 ipa_adjusted_param new_adj = *old_adj;
5074 new_adj.prev_clone_adjustment = true;
5075 new_adj.prev_clone_index = i;
5076 vec_safe_push (new_params, new_adj);
5079 bool skip_return = old_adjustments->m_skip_return;
5080 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5081 ipa_param_adjustments (new_params, count,
5082 skip_return));
5084 else if (node->can_change_signature
5085 && want_remove_some_param_p (node, known_csts))
5087 ipa_adjusted_param adj;
5088 memset (&adj, 0, sizeof (adj));
5089 adj.op = IPA_PARAM_OP_COPY;
5090 for (i = 0; i < count; i++)
5091 if (!known_csts[i] && ipa_is_param_used (info, i))
5093 adj.base_index = i;
5094 adj.prev_clone_index = i;
5095 vec_safe_push (new_params, adj);
5097 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5098 ipa_param_adjustments (new_params, count, false));
5100 else
5101 new_adjustments = NULL;
5103 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5104 for (i = 0; i < count; i++)
5106 tree t = known_csts[i];
5107 if (!t)
5108 continue;
5110 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5112 bool load_ref = false;
5113 symtab_node *ref_symbol;
5114 if (TREE_CODE (t) == ADDR_EXPR)
5116 tree base = get_base_address (TREE_OPERAND (t, 0));
5117 if (TREE_CODE (base) == VAR_DECL
5118 && ipa_get_controlled_uses (info, i) == 0
5119 && ipa_get_param_load_dereferenced (info, i)
5120 && (ref_symbol = symtab_node::get (base)))
5122 load_ref = true;
5123 if (node->can_change_signature)
5124 for (cgraph_edge *caller : callers)
5125 adjust_references_in_caller (caller, ref_symbol, i);
5129 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5130 if (replace_map)
5131 vec_safe_push (replace_trees, replace_map);
5133 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5134 for (i = callers.length () - 1; i >= 0; i--)
5136 cgraph_edge *cs = callers[i];
5137 if (cs->caller == node)
5139 self_recursive_calls.safe_push (cs);
5140 callers.unordered_remove (i);
5144 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5145 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5146 node->decl)));
5147 new_node = node->create_virtual_clone (callers, replace_trees,
5148 new_adjustments, "constprop",
5149 suffix_counter);
5150 suffix_counter++;
5152 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5153 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5155 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5156 /* Cloned edges can disappear during cloning as speculation can be
5157 resolved, check that we have one and that it comes from the last
5158 cloning. */
5159 if (cs && cs->caller == new_node)
5160 cs->redirect_callee_duplicating_thunks (new_node);
5161 /* Any future code that would make more than one clone of an outgoing
5162 edge would confuse this mechanism, so let's check that does not
5163 happen. */
5164 gcc_checking_assert (!cs
5165 || !get_next_cgraph_edge_clone (cs)
5166 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5168 if (have_self_recursive_calls)
5169 new_node->expand_all_artificial_thunks ();
5171 ipa_set_node_agg_value_chain (new_node, aggvals);
5172 for (av = aggvals; av; av = av->next)
5173 new_node->maybe_create_reference (av->value, NULL);
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5178 if (known_contexts.exists ())
5180 for (i = 0; i < count; i++)
5181 if (!known_contexts[i].useless_p ())
5183 fprintf (dump_file, " known ctx %i is ", i);
5184 known_contexts[i].dump (dump_file);
5187 if (aggvals)
5188 ipa_dump_agg_replacement_values (dump_file, aggvals);
5191 new_info = ipa_node_params_sum->get (new_node);
5192 new_info->ipcp_orig_node = node;
5193 new_node->ipcp_clone = true;
5194 new_info->known_csts = known_csts;
5195 new_info->known_contexts = known_contexts;
5197 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5199 return new_node;
5202 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5203 pass-through function to itself when the cgraph_node involved is not an
5204 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5205 no-operation pass-through. */
5207 static bool
5208 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5209 bool simple = true)
5211 enum availability availability;
5212 if (cs->caller == cs->callee->function_symbol (&availability)
5213 && availability > AVAIL_INTERPOSABLE
5214 && jfunc->type == IPA_JF_PASS_THROUGH
5215 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5216 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5217 && ipa_node_params_sum->get (cs->caller)
5218 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5219 return true;
5220 return false;
5223 /* Return true if JFUNC, which describes a part of an aggregate represented or
5224 pointed to by the i-th parameter of call CS, is a pass-through function to
5225 itself when the cgraph_node involved is not an IPA-CP clone.. When
5226 SIMPLE is true, further check if JFUNC is a simple no-operation
5227 pass-through. */
5229 static bool
5230 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5231 int i, bool simple = true)
5233 enum availability availability;
5234 if (cs->caller == cs->callee->function_symbol (&availability)
5235 && availability > AVAIL_INTERPOSABLE
5236 && jfunc->jftype == IPA_JF_LOAD_AGG
5237 && jfunc->offset == jfunc->value.load_agg.offset
5238 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5239 && jfunc->value.pass_through.formal_id == i
5240 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5241 && ipa_node_params_sum->get (cs->caller)
5242 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5243 return true;
5244 return false;
5247 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5248 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5250 static void
5251 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5252 vec<tree> &known_csts,
5253 const vec<cgraph_edge *> &callers)
5255 ipa_node_params *info = ipa_node_params_sum->get (node);
5256 int i, count = ipa_get_param_count (info);
5258 for (i = 0; i < count; i++)
5260 struct cgraph_edge *cs;
5261 tree newval = NULL_TREE;
5262 int j;
5263 bool first = true;
5264 tree type = ipa_get_type (info, i);
5266 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5267 continue;
5269 FOR_EACH_VEC_ELT (callers, j, cs)
5271 struct ipa_jump_func *jump_func;
5272 tree t;
5274 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5275 if (!args
5276 || i >= ipa_get_cs_argument_count (args)
5277 || (i == 0
5278 && call_passes_through_thunk (cs)))
5280 newval = NULL_TREE;
5281 break;
5283 jump_func = ipa_get_ith_jump_func (args, i);
5285 /* Besides simple pass-through jump function, arithmetic jump
5286 function could also introduce argument-direct-pass-through for
5287 self-feeding recursive call. For example,
5289 fn (int i)
5291 fn (i & 1);
5294 Given that i is 0, recursive propagation via (i & 1) also gets
5295 0. */
5296 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5298 gcc_assert (newval);
5299 t = ipa_get_jf_arith_result (
5300 ipa_get_jf_pass_through_operation (jump_func),
5301 newval,
5302 ipa_get_jf_pass_through_operand (jump_func),
5303 type);
5305 else
5306 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5307 jump_func, type);
5308 if (!t
5309 || (newval
5310 && !values_equal_for_ipcp_p (t, newval))
5311 || (!first && !newval))
5313 newval = NULL_TREE;
5314 break;
5316 else
5317 newval = t;
5318 first = false;
5321 if (newval)
5323 if (dump_file && (dump_flags & TDF_DETAILS))
5325 fprintf (dump_file, " adding an extra known scalar value ");
5326 print_ipcp_constant_value (dump_file, newval);
5327 fprintf (dump_file, " for ");
5328 ipa_dump_param (dump_file, info, i);
5329 fprintf (dump_file, "\n");
5332 known_csts[i] = newval;
5337 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5338 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5339 CALLERS. */
5341 static void
5342 find_more_contexts_for_caller_subset (cgraph_node *node,
5343 vec<ipa_polymorphic_call_context>
5344 *known_contexts,
5345 const vec<cgraph_edge *> &callers)
5347 ipa_node_params *info = ipa_node_params_sum->get (node);
5348 int i, count = ipa_get_param_count (info);
5350 for (i = 0; i < count; i++)
5352 cgraph_edge *cs;
5354 if (ipa_get_poly_ctx_lat (info, i)->bottom
5355 || (known_contexts->exists ()
5356 && !(*known_contexts)[i].useless_p ()))
5357 continue;
5359 ipa_polymorphic_call_context newval;
5360 bool first = true;
5361 int j;
5363 FOR_EACH_VEC_ELT (callers, j, cs)
5365 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5366 if (!args
5367 || i >= ipa_get_cs_argument_count (args))
5368 return;
5369 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5370 ipa_polymorphic_call_context ctx;
5371 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5372 cs, i, jfunc);
5373 if (first)
5375 newval = ctx;
5376 first = false;
5378 else
5379 newval.meet_with (ctx);
5380 if (newval.useless_p ())
5381 break;
5384 if (!newval.useless_p ())
5386 if (dump_file && (dump_flags & TDF_DETAILS))
5388 fprintf (dump_file, " adding an extra known polymorphic "
5389 "context ");
5390 print_ipcp_constant_value (dump_file, newval);
5391 fprintf (dump_file, " for ");
5392 ipa_dump_param (dump_file, info, i);
5393 fprintf (dump_file, "\n");
5396 if (!known_contexts->exists ())
5397 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5398 true);
5399 (*known_contexts)[i] = newval;
5405 /* Go through PLATS and create a vector of values consisting of values and
5406 offsets (minus OFFSET) of lattices that contain only a single value. */
5408 static vec<ipa_agg_value>
5409 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5411 vec<ipa_agg_value> res = vNULL;
5413 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5414 return vNULL;
5416 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5417 if (aglat->is_single_const ())
5419 struct ipa_agg_value ti;
5420 ti.offset = aglat->offset - offset;
5421 ti.value = aglat->values->value;
5422 res.safe_push (ti);
5424 return res;
5427 /* Intersect all values in INTER with single value lattices in PLATS (while
5428 subtracting OFFSET). */
5430 static void
5431 intersect_with_plats (class ipcp_param_lattices *plats,
5432 vec<ipa_agg_value> *inter,
5433 HOST_WIDE_INT offset)
5435 struct ipcp_agg_lattice *aglat;
5436 struct ipa_agg_value *item;
5437 int k;
5439 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5441 inter->release ();
5442 return;
5445 aglat = plats->aggs;
5446 FOR_EACH_VEC_ELT (*inter, k, item)
5448 bool found = false;
5449 if (!item->value)
5450 continue;
5451 while (aglat)
5453 if (aglat->offset - offset > item->offset)
5454 break;
5455 if (aglat->offset - offset == item->offset)
5457 if (aglat->is_single_const ())
5459 tree value = aglat->values->value;
5461 if (values_equal_for_ipcp_p (item->value, value))
5462 found = true;
5464 break;
5466 aglat = aglat->next;
5468 if (!found)
5469 item->value = NULL_TREE;
5473 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5474 vector result while subtracting OFFSET from the individual value offsets. */
5476 static vec<ipa_agg_value>
5477 agg_replacements_to_vector (struct cgraph_node *node, int index,
5478 HOST_WIDE_INT offset)
5480 struct ipa_agg_replacement_value *av;
5481 vec<ipa_agg_value> res = vNULL;
5483 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5484 if (av->index == index
5485 && (av->offset - offset) >= 0)
5487 struct ipa_agg_value item;
5488 gcc_checking_assert (av->value);
5489 item.offset = av->offset - offset;
5490 item.value = av->value;
5491 res.safe_push (item);
5494 return res;
5497 /* Intersect all values in INTER with those that we have already scheduled to
5498 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5499 (while subtracting OFFSET). */
5501 static void
5502 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5503 vec<ipa_agg_value> *inter,
5504 HOST_WIDE_INT offset)
5506 struct ipa_agg_replacement_value *srcvals;
5507 struct ipa_agg_value *item;
5508 int i;
5510 srcvals = ipa_get_agg_replacements_for_node (node);
5511 if (!srcvals)
5513 inter->release ();
5514 return;
5517 FOR_EACH_VEC_ELT (*inter, i, item)
5519 struct ipa_agg_replacement_value *av;
5520 bool found = false;
5521 if (!item->value)
5522 continue;
5523 for (av = srcvals; av; av = av->next)
5525 gcc_checking_assert (av->value);
5526 if (av->index == index
5527 && av->offset - offset == item->offset)
5529 if (values_equal_for_ipcp_p (item->value, av->value))
5530 found = true;
5531 break;
5534 if (!found)
5535 item->value = NULL_TREE;
5539 /* Intersect values in INTER with aggregate values that come along edge CS to
5540 parameter number INDEX and return it. If INTER does not actually exist yet,
5541 copy all incoming values to it. If we determine we ended up with no values
5542 whatsoever, return a released vector. */
5544 static vec<ipa_agg_value>
5545 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5546 vec<ipa_agg_value> inter)
5548 struct ipa_jump_func *jfunc;
5549 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5550 if (jfunc->type == IPA_JF_PASS_THROUGH
5551 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5553 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5554 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5556 if (caller_info->ipcp_orig_node)
5558 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5559 class ipcp_param_lattices *orig_plats;
5560 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5561 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5562 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5564 if (!inter.exists ())
5565 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5566 else
5567 intersect_with_agg_replacements (cs->caller, src_idx,
5568 &inter, 0);
5569 return inter;
5572 else
5574 class ipcp_param_lattices *src_plats;
5575 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5576 if (agg_pass_through_permissible_p (src_plats, jfunc))
5578 /* Currently we do not produce clobber aggregate jump
5579 functions, adjust when we do. */
5580 gcc_checking_assert (!jfunc->agg.items);
5581 if (!inter.exists ())
5582 inter = copy_plats_to_inter (src_plats, 0);
5583 else
5584 intersect_with_plats (src_plats, &inter, 0);
5585 return inter;
5589 else if (jfunc->type == IPA_JF_ANCESTOR
5590 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5592 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5593 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5594 class ipcp_param_lattices *src_plats;
5595 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5597 if (caller_info->ipcp_orig_node)
5599 if (!inter.exists ())
5600 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5601 else
5602 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5603 delta);
5605 else
5607 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5608 /* Currently we do not produce clobber aggregate jump
5609 functions, adjust when we do. */
5610 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5611 if (!inter.exists ())
5612 inter = copy_plats_to_inter (src_plats, delta);
5613 else
5614 intersect_with_plats (src_plats, &inter, delta);
5616 return inter;
5619 if (jfunc->agg.items)
5621 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5622 struct ipa_agg_value *item;
5623 int k;
5625 if (!inter.exists ())
5626 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5628 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5629 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5630 agg_item);
5631 if (value)
5633 struct ipa_agg_value agg_value;
5635 agg_value.value = value;
5636 agg_value.offset = agg_item->offset;
5637 inter.safe_push (agg_value);
5640 else
5641 FOR_EACH_VEC_ELT (inter, k, item)
5643 int l = 0;
5644 bool found = false;
5646 if (!item->value)
5647 continue;
5649 while ((unsigned) l < jfunc->agg.items->length ())
5651 struct ipa_agg_jf_item *ti;
5652 ti = &(*jfunc->agg.items)[l];
5653 if (ti->offset > item->offset)
5654 break;
5655 if (ti->offset == item->offset)
5657 tree value;
5659 /* Besides simple pass-through aggregate jump function,
5660 arithmetic aggregate jump function could also bring
5661 same aggregate value as parameter passed-in for
5662 self-feeding recursive call. For example,
5664 fn (int *i)
5666 int j = *i & 1;
5667 fn (&j);
5670 Given that *i is 0, recursive propagation via (*i & 1)
5671 also gets 0. */
5672 if (self_recursive_agg_pass_through_p (cs, ti, index,
5673 false))
5674 value = ipa_get_jf_arith_result (
5675 ti->value.pass_through.operation,
5676 item->value,
5677 ti->value.pass_through.operand,
5678 ti->type);
5679 else
5680 value = ipa_agg_value_from_node (caller_info,
5681 cs->caller, ti);
5683 if (value && values_equal_for_ipcp_p (item->value, value))
5684 found = true;
5685 break;
5687 l++;
5689 if (!found)
5690 item->value = NULL;
5693 else
5695 inter.release ();
5696 return vNULL;
5698 return inter;
5701 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5702 from all of them. */
5704 static struct ipa_agg_replacement_value *
5705 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5706 const vec<cgraph_edge *> &callers)
5708 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5709 struct ipa_agg_replacement_value *res;
5710 struct ipa_agg_replacement_value **tail = &res;
5711 struct cgraph_edge *cs;
5712 int i, j, count = ipa_get_param_count (dest_info);
5714 FOR_EACH_VEC_ELT (callers, j, cs)
5716 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5717 if (!args)
5719 count = 0;
5720 break;
5722 int c = ipa_get_cs_argument_count (args);
5723 if (c < count)
5724 count = c;
5727 for (i = 0; i < count; i++)
5729 struct cgraph_edge *cs;
5730 vec<ipa_agg_value> inter = vNULL;
5731 struct ipa_agg_value *item;
5732 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5733 int j;
5735 /* Among other things, the following check should deal with all by_ref
5736 mismatches. */
5737 if (plats->aggs_bottom)
5738 continue;
5740 FOR_EACH_VEC_ELT (callers, j, cs)
5742 struct ipa_jump_func *jfunc
5743 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5744 if (self_recursive_pass_through_p (cs, jfunc, i)
5745 && (!plats->aggs_by_ref
5746 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5747 continue;
5748 inter = intersect_aggregates_with_edge (cs, i, inter);
5750 if (!inter.exists ())
5751 goto next_param;
5754 FOR_EACH_VEC_ELT (inter, j, item)
5756 struct ipa_agg_replacement_value *v;
5758 if (!item->value)
5759 continue;
5761 v = ggc_alloc<ipa_agg_replacement_value> ();
5762 v->index = i;
5763 v->offset = item->offset;
5764 v->value = item->value;
5765 v->by_ref = plats->aggs_by_ref;
5766 *tail = v;
5767 tail = &v->next;
5770 next_param:
5771 if (inter.exists ())
5772 inter.release ();
5774 *tail = NULL;
5775 return res;
5778 /* Determine whether CS also brings all scalar values that the NODE is
5779 specialized for. */
5781 static bool
5782 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5783 struct cgraph_node *node)
5785 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5786 int count = ipa_get_param_count (dest_info);
5787 class ipa_node_params *caller_info;
5788 class ipa_edge_args *args;
5789 int i;
5791 caller_info = ipa_node_params_sum->get (cs->caller);
5792 args = ipa_edge_args_sum->get (cs);
5793 for (i = 0; i < count; i++)
5795 struct ipa_jump_func *jump_func;
5796 tree val, t;
5798 val = dest_info->known_csts[i];
5799 if (!val)
5800 continue;
5802 if (i >= ipa_get_cs_argument_count (args))
5803 return false;
5804 jump_func = ipa_get_ith_jump_func (args, i);
5805 t = ipa_value_from_jfunc (caller_info, jump_func,
5806 ipa_get_type (dest_info, i));
5807 if (!t || !values_equal_for_ipcp_p (val, t))
5808 return false;
5810 return true;
5813 /* Determine whether CS also brings all aggregate values that NODE is
5814 specialized for. */
5815 static bool
5816 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5817 struct cgraph_node *node)
5819 struct ipa_agg_replacement_value *aggval;
5820 int i, ec, count;
5822 aggval = ipa_get_agg_replacements_for_node (node);
5823 if (!aggval)
5824 return true;
5826 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5827 count = ipa_get_param_count (clone_node_info);
5828 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5829 if (ec < count)
5830 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5831 if (aggval->index >= ec)
5832 return false;
5834 ipa_node_params *orig_node_info
5835 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5837 for (i = 0; i < count; i++)
5839 class ipcp_param_lattices *plats;
5840 bool interesting = false;
5841 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5842 if (aggval->index == i)
5844 interesting = true;
5845 break;
5847 if (!interesting)
5848 continue;
5850 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5851 if (plats->aggs_bottom)
5852 return false;
5854 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5855 if (!values.exists ())
5856 return false;
5858 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5859 if (aggval->index == i)
5861 struct ipa_agg_value *item;
5862 int j;
5863 bool found = false;
5864 FOR_EACH_VEC_ELT (values, j, item)
5865 if (item->value
5866 && item->offset == av->offset
5867 && values_equal_for_ipcp_p (item->value, av->value))
5869 found = true;
5870 break;
5872 if (!found)
5874 values.release ();
5875 return false;
5878 values.release ();
5880 return true;
5883 /* Given an original NODE and a VAL for which we have already created a
5884 specialized clone, look whether there are incoming edges that still lead
5885 into the old node but now also bring the requested value and also conform to
5886 all other criteria such that they can be redirected the special node.
5887 This function can therefore redirect the final edge in a SCC. */
5889 template <typename valtype>
5890 static void
5891 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5893 ipcp_value_source<valtype> *src;
5894 profile_count redirected_sum = profile_count::zero ();
5896 for (src = val->sources; src; src = src->next)
5898 struct cgraph_edge *cs = src->cs;
5899 while (cs)
5901 if (cgraph_edge_brings_value_p (cs, src, node, val)
5902 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5903 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5905 if (dump_file)
5906 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5907 cs->caller->dump_name (),
5908 val->spec_node->dump_name ());
5910 cs->redirect_callee_duplicating_thunks (val->spec_node);
5911 val->spec_node->expand_all_artificial_thunks ();
5912 if (cs->count.ipa ().initialized_p ())
5913 redirected_sum = redirected_sum + cs->count.ipa ();
5915 cs = get_next_cgraph_edge_clone (cs);
5919 if (redirected_sum.nonzero_p ())
5920 update_specialized_profile (val->spec_node, node, redirected_sum);
5923 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5925 static bool
5926 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5928 ipa_polymorphic_call_context *ctx;
5929 int i;
5931 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5932 if (!ctx->useless_p ())
5933 return true;
5934 return false;
5937 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5939 static vec<ipa_polymorphic_call_context>
5940 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5942 if (known_contexts_useful_p (known_contexts))
5943 return known_contexts.copy ();
5944 else
5945 return vNULL;
5948 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5949 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5950 copy too. */
5952 static void
5953 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5954 vec<tree> *known_csts,
5955 vec<ipa_polymorphic_call_context> *known_contexts,
5956 ipcp_value<tree> *val, int index)
5958 *known_csts = avals->m_known_vals.copy ();
5959 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5960 (*known_csts)[index] = val->value;
5963 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5964 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5965 INDEX. */
5967 static void
5968 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5969 vec<tree> *known_csts,
5970 vec<ipa_polymorphic_call_context> *known_contexts,
5971 ipcp_value<ipa_polymorphic_call_context> *val,
5972 int index)
5974 *known_csts = avals->m_known_vals.copy ();
5975 *known_contexts = avals->m_known_contexts.copy ();
5976 (*known_contexts)[index] = val->value;
5979 /* Return true if OFFSET indicates this was not an aggregate value or there is
5980 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5981 AGGVALS list. */
5983 DEBUG_FUNCTION bool
5984 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
5985 int index, HOST_WIDE_INT offset, tree value)
5987 if (offset == -1)
5988 return true;
5990 while (aggvals)
5992 if (aggvals->index == index
5993 && aggvals->offset == offset
5994 && values_equal_for_ipcp_p (aggvals->value, value))
5995 return true;
5996 aggvals = aggvals->next;
5998 return false;
6001 /* Return true if offset is minus one because source of a polymorphic context
6002 cannot be an aggregate value. */
6004 DEBUG_FUNCTION bool
6005 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6006 int , HOST_WIDE_INT offset,
6007 ipa_polymorphic_call_context)
6009 return offset == -1;
6012 /* Decide whether to create a special version of NODE for value VAL of
6013 parameter at the given INDEX. If OFFSET is -1, the value is for the
6014 parameter itself, otherwise it is stored at the given OFFSET of the
6015 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6016 is a vector which contains clones created for self-recursive calls with an
6017 arithmetic pass-through jump function. */
6019 template <typename valtype>
6020 static bool
6021 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6022 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6023 vec<cgraph_node *> *self_gen_clones)
6025 struct ipa_agg_replacement_value *aggvals;
6026 int caller_count;
6027 sreal freq_sum;
6028 profile_count count_sum, rec_count_sum;
6029 vec<cgraph_edge *> callers;
6031 if (val->spec_node)
6033 perhaps_add_new_callers (node, val);
6034 return false;
6036 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6038 if (dump_file && (dump_flags & TDF_DETAILS))
6039 fprintf (dump_file, " Ignoring candidate value because "
6040 "maximum unit size would be reached with %li.\n",
6041 val->local_size_cost + overall_size);
6042 return false;
6044 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6045 &rec_count_sum, &count_sum))
6046 return false;
6048 if (!dbg_cnt (ipa_cp_values))
6049 return false;
6051 if (val->self_recursion_generated_p ())
6053 /* The edge counts in this case might not have been adjusted yet.
6054 Nevertleless, even if they were it would be only a guesswork which we
6055 can do now. The recursive part of the counts can be derived from the
6056 count of the original node anyway. */
6057 if (node->count.ipa ().nonzero_p ())
6059 unsigned dem = self_gen_clones->length () + 1;
6060 rec_count_sum = node->count.ipa ().apply_scale (1, dem);
6062 else
6063 rec_count_sum = profile_count::zero ();
6066 /* get_info_about_necessary_edges only sums up ipa counts. */
6067 count_sum += rec_count_sum;
6069 if (dump_file && (dump_flags & TDF_DETAILS))
6071 fprintf (dump_file, " - considering value ");
6072 print_ipcp_constant_value (dump_file, val->value);
6073 fprintf (dump_file, " for ");
6074 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6075 if (offset != -1)
6076 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6077 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6080 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6081 freq_sum, count_sum,
6082 val->local_size_cost)
6083 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6084 freq_sum, count_sum, val->prop_size_cost))
6085 return false;
6087 if (dump_file)
6088 fprintf (dump_file, " Creating a specialized node of %s.\n",
6089 node->dump_name ());
6091 vec<tree> known_csts;
6092 vec<ipa_polymorphic_call_context> known_contexts;
6094 callers = gather_edges_for_value (val, node, caller_count);
6095 if (offset == -1)
6096 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6097 else
6099 known_csts = avals->m_known_vals.copy ();
6100 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6102 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6103 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6104 aggvals = find_aggregate_values_for_callers_subset (node, callers);
6105 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6106 offset, val->value));
6107 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6108 aggvals, callers);
6110 if (val->self_recursion_generated_p ())
6111 self_gen_clones->safe_push (val->spec_node);
6112 else
6113 update_profiling_info (node, val->spec_node);
6115 callers.release ();
6116 overall_size += val->local_size_cost;
6117 if (dump_file && (dump_flags & TDF_DETAILS))
6118 fprintf (dump_file, " overall size reached %li\n",
6119 overall_size);
6121 /* TODO: If for some lattice there is only one other known value
6122 left, make a special node for it too. */
6124 return true;
6127 /* Decide whether and what specialized clones of NODE should be created. */
6129 static bool
6130 decide_whether_version_node (struct cgraph_node *node)
6132 ipa_node_params *info = ipa_node_params_sum->get (node);
6133 int i, count = ipa_get_param_count (info);
6134 bool ret = false;
6136 if (count == 0)
6137 return false;
6139 if (dump_file && (dump_flags & TDF_DETAILS))
6140 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6141 node->dump_name ());
6143 auto_vec <cgraph_node *, 9> self_gen_clones;
6144 ipa_auto_call_arg_values avals;
6145 gather_context_independent_values (info, &avals, false, NULL);
6147 for (i = 0; i < count;i++)
6149 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6150 ipcp_lattice<tree> *lat = &plats->itself;
6151 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6153 if (!lat->bottom
6154 && !avals.m_known_vals[i])
6156 ipcp_value<tree> *val;
6157 for (val = lat->values; val; val = val->next)
6158 ret |= decide_about_value (node, i, -1, val, &avals,
6159 &self_gen_clones);
6162 if (!plats->aggs_bottom)
6164 struct ipcp_agg_lattice *aglat;
6165 ipcp_value<tree> *val;
6166 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6167 if (!aglat->bottom && aglat->values
6168 /* If the following is false, the one value has been considered
6169 for cloning for all contexts. */
6170 && (plats->aggs_contain_variable
6171 || !aglat->is_single_const ()))
6172 for (val = aglat->values; val; val = val->next)
6173 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6174 &self_gen_clones);
6177 if (!ctxlat->bottom
6178 && avals.m_known_contexts[i].useless_p ())
6180 ipcp_value<ipa_polymorphic_call_context> *val;
6181 for (val = ctxlat->values; val; val = val->next)
6182 ret |= decide_about_value (node, i, -1, val, &avals,
6183 &self_gen_clones);
6187 if (!self_gen_clones.is_empty ())
6189 self_gen_clones.safe_push (node);
6190 update_counts_for_self_gen_clones (node, self_gen_clones);
6193 if (info->do_clone_for_all_contexts)
6195 if (!dbg_cnt (ipa_cp_values))
6197 info->do_clone_for_all_contexts = false;
6198 return ret;
6201 struct cgraph_node *clone;
6202 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6204 for (int i = callers.length () - 1; i >= 0; i--)
6206 cgraph_edge *cs = callers[i];
6207 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6209 if (caller_info && caller_info->node_dead)
6210 callers.unordered_remove (i);
6213 if (!adjust_callers_for_value_intersection (callers, node))
6215 /* If node is not called by anyone, or all its caller edges are
6216 self-recursive, the node is not really in use, no need to do
6217 cloning. */
6218 info->do_clone_for_all_contexts = false;
6219 return ret;
6222 if (dump_file)
6223 fprintf (dump_file, " - Creating a specialized node of %s "
6224 "for all known contexts.\n", node->dump_name ());
6226 vec<tree> known_csts = avals.m_known_vals.copy ();
6227 vec<ipa_polymorphic_call_context> known_contexts
6228 = copy_useful_known_contexts (avals.m_known_contexts);
6229 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6230 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6231 ipa_agg_replacement_value *aggvals
6232 = find_aggregate_values_for_callers_subset (node, callers);
6234 if (!known_contexts_useful_p (known_contexts))
6236 known_contexts.release ();
6237 known_contexts = vNULL;
6239 clone = create_specialized_node (node, known_csts, known_contexts,
6240 aggvals, callers);
6241 info->do_clone_for_all_contexts = false;
6242 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6243 ret = true;
6246 return ret;
6249 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6251 static void
6252 spread_undeadness (struct cgraph_node *node)
6254 struct cgraph_edge *cs;
6256 for (cs = node->callees; cs; cs = cs->next_callee)
6257 if (ipa_edge_within_scc (cs))
6259 struct cgraph_node *callee;
6260 class ipa_node_params *info;
6262 callee = cs->callee->function_symbol (NULL);
6263 info = ipa_node_params_sum->get (callee);
6265 if (info && info->node_dead)
6267 info->node_dead = 0;
6268 spread_undeadness (callee);
6273 /* Return true if NODE has a caller from outside of its SCC that is not
6274 dead. Worker callback for cgraph_for_node_and_aliases. */
6276 static bool
6277 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6278 void *data ATTRIBUTE_UNUSED)
6280 struct cgraph_edge *cs;
6282 for (cs = node->callers; cs; cs = cs->next_caller)
6283 if (cs->caller->thunk
6284 && cs->caller->call_for_symbol_thunks_and_aliases
6285 (has_undead_caller_from_outside_scc_p, NULL, true))
6286 return true;
6287 else if (!ipa_edge_within_scc (cs))
6289 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6290 if (!caller_info /* Unoptimized caller are like dead ones. */
6291 || !caller_info->node_dead)
6292 return true;
6294 return false;
6298 /* Identify nodes within the same SCC as NODE which are no longer needed
6299 because of new clones and will be removed as unreachable. */
6301 static void
6302 identify_dead_nodes (struct cgraph_node *node)
6304 struct cgraph_node *v;
6305 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6306 if (v->local)
6308 ipa_node_params *info = ipa_node_params_sum->get (v);
6309 if (info
6310 && !v->call_for_symbol_thunks_and_aliases
6311 (has_undead_caller_from_outside_scc_p, NULL, true))
6312 info->node_dead = 1;
6315 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6317 ipa_node_params *info = ipa_node_params_sum->get (v);
6318 if (info && !info->node_dead)
6319 spread_undeadness (v);
6322 if (dump_file && (dump_flags & TDF_DETAILS))
6324 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6325 if (ipa_node_params_sum->get (v)
6326 && ipa_node_params_sum->get (v)->node_dead)
6327 fprintf (dump_file, " Marking node as dead: %s.\n",
6328 v->dump_name ());
6332 /* The decision stage. Iterate over the topological order of call graph nodes
6333 TOPO and make specialized clones if deemed beneficial. */
6335 static void
6336 ipcp_decision_stage (class ipa_topo_info *topo)
6338 int i;
6340 if (dump_file)
6341 fprintf (dump_file, "\nIPA decision stage:\n\n");
6343 for (i = topo->nnodes - 1; i >= 0; i--)
6345 struct cgraph_node *node = topo->order[i];
6346 bool change = false, iterate = true;
6348 while (iterate)
6350 struct cgraph_node *v;
6351 iterate = false;
6352 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6353 if (v->has_gimple_body_p ()
6354 && ipcp_versionable_function_p (v))
6355 iterate |= decide_whether_version_node (v);
6357 change |= iterate;
6359 if (change)
6360 identify_dead_nodes (node);
6364 /* Look up all the bits information that we have discovered and copy it over
6365 to the transformation summary. */
6367 static void
6368 ipcp_store_bits_results (void)
6370 cgraph_node *node;
6372 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6374 ipa_node_params *info = ipa_node_params_sum->get (node);
6375 bool dumped_sth = false;
6376 bool found_useful_result = false;
6378 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6380 if (dump_file)
6381 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6382 "; -fipa-bit-cp: disabled.\n",
6383 node->dump_name ());
6384 continue;
6387 if (info->ipcp_orig_node)
6388 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6389 if (!info->lattices)
6390 /* Newly expanded artificial thunks do not have lattices. */
6391 continue;
6393 unsigned count = ipa_get_param_count (info);
6394 for (unsigned i = 0; i < count; i++)
6396 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6397 if (plats->bits_lattice.constant_p ())
6399 found_useful_result = true;
6400 break;
6404 if (!found_useful_result)
6405 continue;
6407 ipcp_transformation_initialize ();
6408 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6409 vec_safe_reserve_exact (ts->bits, count);
6411 for (unsigned i = 0; i < count; i++)
6413 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6414 ipa_bits *jfbits;
6416 if (plats->bits_lattice.constant_p ())
6418 jfbits
6419 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6420 plats->bits_lattice.get_mask ());
6421 if (!dbg_cnt (ipa_cp_bits))
6422 jfbits = NULL;
6424 else
6425 jfbits = NULL;
6427 ts->bits->quick_push (jfbits);
6428 if (!dump_file || !jfbits)
6429 continue;
6430 if (!dumped_sth)
6432 fprintf (dump_file, "Propagated bits info for function %s:\n",
6433 node->dump_name ());
6434 dumped_sth = true;
6436 fprintf (dump_file, " param %i: value = ", i);
6437 print_hex (jfbits->value, dump_file);
6438 fprintf (dump_file, ", mask = ");
6439 print_hex (jfbits->mask, dump_file);
6440 fprintf (dump_file, "\n");
6445 /* Look up all VR information that we have discovered and copy it over
6446 to the transformation summary. */
6448 static void
6449 ipcp_store_vr_results (void)
6451 cgraph_node *node;
6453 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6455 ipa_node_params *info = ipa_node_params_sum->get (node);
6456 bool found_useful_result = false;
6458 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6460 if (dump_file)
6461 fprintf (dump_file, "Not considering %s for VR discovery "
6462 "and propagate; -fipa-ipa-vrp: disabled.\n",
6463 node->dump_name ());
6464 continue;
6467 if (info->ipcp_orig_node)
6468 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6469 if (!info->lattices)
6470 /* Newly expanded artificial thunks do not have lattices. */
6471 continue;
6473 unsigned count = ipa_get_param_count (info);
6474 for (unsigned i = 0; i < count; i++)
6476 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6477 if (!plats->m_value_range.bottom_p ()
6478 && !plats->m_value_range.top_p ())
6480 found_useful_result = true;
6481 break;
6484 if (!found_useful_result)
6485 continue;
6487 ipcp_transformation_initialize ();
6488 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6489 vec_safe_reserve_exact (ts->m_vr, count);
6491 for (unsigned i = 0; i < count; i++)
6493 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6494 ipa_vr vr;
6496 if (!plats->m_value_range.bottom_p ()
6497 && !plats->m_value_range.top_p ()
6498 && dbg_cnt (ipa_cp_vr))
6500 vr.known = true;
6501 vr.type = plats->m_value_range.m_vr.kind ();
6502 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6503 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6505 else
6507 vr.known = false;
6508 vr.type = VR_VARYING;
6509 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6511 ts->m_vr->quick_push (vr);
6516 /* The IPCP driver. */
6518 static unsigned int
6519 ipcp_driver (void)
6521 class ipa_topo_info topo;
6523 if (edge_clone_summaries == NULL)
6524 edge_clone_summaries = new edge_clone_summary_t (symtab);
6526 ipa_check_create_node_params ();
6527 ipa_check_create_edge_args ();
6528 clone_num_suffixes = new hash_map<const char *, unsigned>;
6530 if (dump_file)
6532 fprintf (dump_file, "\nIPA structures before propagation:\n");
6533 if (dump_flags & TDF_DETAILS)
6534 ipa_print_all_params (dump_file);
6535 ipa_print_all_jump_functions (dump_file);
6538 /* Topological sort. */
6539 build_toporder_info (&topo);
6540 /* Do the interprocedural propagation. */
6541 ipcp_propagate_stage (&topo);
6542 /* Decide what constant propagation and cloning should be performed. */
6543 ipcp_decision_stage (&topo);
6544 /* Store results of bits propagation. */
6545 ipcp_store_bits_results ();
6546 /* Store results of value range propagation. */
6547 ipcp_store_vr_results ();
6549 /* Free all IPCP structures. */
6550 delete clone_num_suffixes;
6551 free_toporder_info (&topo);
6552 delete edge_clone_summaries;
6553 edge_clone_summaries = NULL;
6554 ipa_free_all_structures_after_ipa_cp ();
6555 if (dump_file)
6556 fprintf (dump_file, "\nIPA constant propagation end\n");
6557 return 0;
6560 /* Initialization and computation of IPCP data structures. This is the initial
6561 intraprocedural analysis of functions, which gathers information to be
6562 propagated later on. */
6564 static void
6565 ipcp_generate_summary (void)
6567 struct cgraph_node *node;
6569 if (dump_file)
6570 fprintf (dump_file, "\nIPA constant propagation start:\n");
6571 ipa_register_cgraph_hooks ();
6573 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6574 ipa_analyze_node (node);
6577 namespace {
6579 const pass_data pass_data_ipa_cp =
6581 IPA_PASS, /* type */
6582 "cp", /* name */
6583 OPTGROUP_NONE, /* optinfo_flags */
6584 TV_IPA_CONSTANT_PROP, /* tv_id */
6585 0, /* properties_required */
6586 0, /* properties_provided */
6587 0, /* properties_destroyed */
6588 0, /* todo_flags_start */
6589 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6592 class pass_ipa_cp : public ipa_opt_pass_d
6594 public:
6595 pass_ipa_cp (gcc::context *ctxt)
6596 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6597 ipcp_generate_summary, /* generate_summary */
6598 NULL, /* write_summary */
6599 NULL, /* read_summary */
6600 ipcp_write_transformation_summaries, /*
6601 write_optimization_summary */
6602 ipcp_read_transformation_summaries, /*
6603 read_optimization_summary */
6604 NULL, /* stmt_fixup */
6605 0, /* function_transform_todo_flags_start */
6606 ipcp_transform_function, /* function_transform */
6607 NULL) /* variable_transform */
6610 /* opt_pass methods: */
6611 virtual bool gate (function *)
6613 /* FIXME: We should remove the optimize check after we ensure we never run
6614 IPA passes when not optimizing. */
6615 return (flag_ipa_cp && optimize) || in_lto_p;
6618 virtual unsigned int execute (function *) { return ipcp_driver (); }
6620 }; // class pass_ipa_cp
6622 } // anon namespace
6624 ipa_opt_pass_d *
6625 make_pass_ipa_cp (gcc::context *ctxt)
6627 return new pass_ipa_cp (ctxt);
6630 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6631 within the same process. For use by toplev::finalize. */
6633 void
6634 ipa_cp_c_finalize (void)
6636 base_count = profile_count::uninitialized ();
6637 overall_size = 0;
6638 orig_overall_size = 0;
6639 ipcp_free_transformation_sum ();