Revise -mdisable-fpregs option and add new -msoft-mult option
[official-gcc.git] / gcc / ipa-cp.c
blobb987d975793392903e6e06649305092acdddde14
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
101 the second stage. */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
130 template <typename valtype> class ipcp_value;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype>
135 struct ipcp_value_source
137 public:
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset;
141 /* The incoming edge that brought the value. */
142 cgraph_edge *cs;
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value<valtype> *val;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source *next;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
152 int index;
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
159 public:
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit;
163 /* Time benefit that specializing the function for this value can bring about
164 in it's callees. */
165 sreal prop_time_benefit;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
168 int local_size_cost;
169 /* Size cost that specializing the function for this value can bring about in
170 it's callees. */
171 int prop_size_cost;
173 ipcp_value_base ()
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
183 public:
184 /* The actual value for the given parameter. */
185 valtype value;
186 /* The list of sources from which this value originates. */
187 ipcp_value_source <valtype> *sources = nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value *next = nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
191 of values. */
192 ipcp_value *scc_next = nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value *topo_next = nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
197 created. */
198 cgraph_node *spec_node = nullptr;
199 /* Depth first search number and low link for topological sorting of
200 values. */
201 int dfs = 0;
202 int low_link = 0;
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
205 int scc_no = 0;
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
210 maximum. */
211 unsigned self_recursion_generated_level = 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack = false;
215 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216 HOST_WIDE_INT offset);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value<valtype> *o)
222 return o->scc_no == scc_no;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level > 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype>
242 struct ipcp_lattice
244 public:
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value<valtype> *values;
249 /* Number of known values and types in this lattice. */
250 int values_count;
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
254 propagation). */
255 bool bottom;
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval, cgraph_edge *cs,
261 ipcp_value<valtype> *src_val = NULL,
262 int src_idx = 0, HOST_WIDE_INT offset = -1,
263 ipcp_value<valtype> **val_p = NULL,
264 unsigned same_lat_gen_level = 0);
265 void print (FILE * f, bool dump_sources, bool dump_benefits);
268 /* Lattice of tree values with an offset to describe a part of an
269 aggregate. */
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
273 public:
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
278 HOST_WIDE_INT size;
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice *next;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
285 value are constant.
286 For eg:
287 int f(int x)
289 return some_op (x);
292 int f1(int y)
294 if (cond)
295 return f (y & 0xff);
296 else
297 return f (y & 0xf);
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
308 public:
309 bool bottom_p () { return m_lattice_val == IPA_BITS_VARYING; }
310 bool top_p () { return m_lattice_val == IPA_BITS_UNDEFINED; }
311 bool constant_p () { return m_lattice_val == IPA_BITS_CONSTANT; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int, widest_int);
315 widest_int get_value () { return m_value; }
316 widest_int get_mask () { return m_mask; }
318 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
319 enum tree_code, tree);
321 bool meet_with (widest_int, widest_int, unsigned);
323 void print (FILE *);
325 private:
326 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
328 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
329 If a bit in mask is set to 0, then the corresponding bit in
330 value is known to be constant. */
331 widest_int m_value, m_mask;
333 bool meet_with_1 (widest_int, widest_int, unsigned);
334 void get_value_and_mask (tree, widest_int *, widest_int *);
337 /* Lattice of value ranges. */
339 class ipcp_vr_lattice
341 public:
342 value_range m_vr;
344 inline bool bottom_p () const;
345 inline bool top_p () const;
346 inline bool set_to_bottom ();
347 bool meet_with (const value_range *p_vr);
348 bool meet_with (const ipcp_vr_lattice &other);
349 void init () { gcc_assert (m_vr.undefined_p ()); }
350 void print (FILE * f);
352 private:
353 bool meet_with_1 (const value_range *other_vr);
356 /* Structure containing lattices for a parameter itself and for pieces of
357 aggregates that are passed in the parameter or by a reference in a parameter
358 plus some other useful flags. */
360 class ipcp_param_lattices
362 public:
363 /* Lattice describing the value of the parameter itself. */
364 ipcp_lattice<tree> itself;
365 /* Lattice describing the polymorphic contexts of a parameter. */
366 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
367 /* Lattices describing aggregate parts. */
368 ipcp_agg_lattice *aggs;
369 /* Lattice describing known bits. */
370 ipcp_bits_lattice bits_lattice;
371 /* Lattice describing value range. */
372 ipcp_vr_lattice m_value_range;
373 /* Number of aggregate lattices */
374 int aggs_count;
375 /* True if aggregate data were passed by reference (as opposed to by
376 value). */
377 bool aggs_by_ref;
378 /* All aggregate lattices contain a variable component (in addition to
379 values). */
380 bool aggs_contain_variable;
381 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
382 for any propagation). */
383 bool aggs_bottom;
385 /* There is a virtual call based on this parameter. */
386 bool virt_call;
389 /* Allocation pools for values and their sources in ipa-cp. */
391 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
392 ("IPA-CP constant values");
394 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
395 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
398 ("IPA-CP value sources");
400 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
401 ("IPA_CP aggregate lattices");
403 /* Maximal count found in program. */
405 static profile_count max_count;
407 /* Original overall size of the program. */
409 static long overall_size, orig_overall_size;
411 /* Node name to unique clone suffix number map. */
412 static hash_map<const char *, unsigned> *clone_num_suffixes;
414 /* Return the param lattices structure corresponding to the Ith formal
415 parameter of the function described by INFO. */
416 static inline class ipcp_param_lattices *
417 ipa_get_parm_lattices (class ipa_node_params *info, int i)
419 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
420 gcc_checking_assert (!info->ipcp_orig_node);
421 gcc_checking_assert (info->lattices);
422 return &(info->lattices[i]);
425 /* Return the lattice corresponding to the scalar value of the Ith formal
426 parameter of the function described by INFO. */
427 static inline ipcp_lattice<tree> *
428 ipa_get_scalar_lat (class ipa_node_params *info, int i)
430 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
431 return &plats->itself;
434 /* Return the lattice corresponding to the scalar value of the Ith formal
435 parameter of the function described by INFO. */
436 static inline ipcp_lattice<ipa_polymorphic_call_context> *
437 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
439 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
440 return &plats->ctxlat;
443 /* Return whether LAT is a lattice with a single constant and without an
444 undefined value. */
446 template <typename valtype>
447 inline bool
448 ipcp_lattice<valtype>::is_single_const ()
450 if (bottom || contains_variable || values_count != 1)
451 return false;
452 else
453 return true;
456 /* Print V which is extracted from a value in a lattice to F. */
458 static void
459 print_ipcp_constant_value (FILE * f, tree v)
461 if (TREE_CODE (v) == ADDR_EXPR
462 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
464 fprintf (f, "& ");
465 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
467 else
468 print_generic_expr (f, v);
471 /* Print V which is extracted from a value in a lattice to F. */
473 static void
474 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
476 v.dump(f, false);
479 /* Print a lattice LAT to F. */
481 template <typename valtype>
482 void
483 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
485 ipcp_value<valtype> *val;
486 bool prev = false;
488 if (bottom)
490 fprintf (f, "BOTTOM\n");
491 return;
494 if (!values_count && !contains_variable)
496 fprintf (f, "TOP\n");
497 return;
500 if (contains_variable)
502 fprintf (f, "VARIABLE");
503 prev = true;
504 if (dump_benefits)
505 fprintf (f, "\n");
508 for (val = values; val; val = val->next)
510 if (dump_benefits && prev)
511 fprintf (f, " ");
512 else if (!dump_benefits && prev)
513 fprintf (f, ", ");
514 else
515 prev = true;
517 print_ipcp_constant_value (f, val->value);
519 if (dump_sources)
521 ipcp_value_source<valtype> *s;
523 if (val->self_recursion_generated_p ())
524 fprintf (f, " [self_gen(%i), from:",
525 val->self_recursion_generated_level);
526 else
527 fprintf (f, " [scc: %i, from:", val->scc_no);
528 for (s = val->sources; s; s = s->next)
529 fprintf (f, " %i(%f)", s->cs->caller->order,
530 s->cs->sreal_frequency ().to_double ());
531 fprintf (f, "]");
534 if (dump_benefits)
535 fprintf (f, " [loc_time: %g, loc_size: %i, "
536 "prop_time: %g, prop_size: %i]\n",
537 val->local_time_benefit.to_double (), val->local_size_cost,
538 val->prop_time_benefit.to_double (), val->prop_size_cost);
540 if (!dump_benefits)
541 fprintf (f, "\n");
544 void
545 ipcp_bits_lattice::print (FILE *f)
547 if (top_p ())
548 fprintf (f, " Bits unknown (TOP)\n");
549 else if (bottom_p ())
550 fprintf (f, " Bits unusable (BOTTOM)\n");
551 else
553 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
554 fprintf (f, ", mask = "); print_hex (get_mask (), f);
555 fprintf (f, "\n");
559 /* Print value range lattice to F. */
561 void
562 ipcp_vr_lattice::print (FILE * f)
564 dump_value_range (f, &m_vr);
567 /* Print all ipcp_lattices of all functions to F. */
569 static void
570 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
572 struct cgraph_node *node;
573 int i, count;
575 fprintf (f, "\nLattices:\n");
576 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
578 class ipa_node_params *info;
580 info = ipa_node_params_sum->get (node);
581 /* Skip unoptimized functions and constprop clones since we don't make
582 lattices for them. */
583 if (!info || info->ipcp_orig_node)
584 continue;
585 fprintf (f, " Node: %s:\n", node->dump_name ());
586 count = ipa_get_param_count (info);
587 for (i = 0; i < count; i++)
589 struct ipcp_agg_lattice *aglat;
590 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
591 fprintf (f, " param [%d]: ", i);
592 plats->itself.print (f, dump_sources, dump_benefits);
593 fprintf (f, " ctxs: ");
594 plats->ctxlat.print (f, dump_sources, dump_benefits);
595 plats->bits_lattice.print (f);
596 fprintf (f, " ");
597 plats->m_value_range.print (f);
598 fprintf (f, "\n");
599 if (plats->virt_call)
600 fprintf (f, " virt_call flag set\n");
602 if (plats->aggs_bottom)
604 fprintf (f, " AGGS BOTTOM\n");
605 continue;
607 if (plats->aggs_contain_variable)
608 fprintf (f, " AGGS VARIABLE\n");
609 for (aglat = plats->aggs; aglat; aglat = aglat->next)
611 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
612 plats->aggs_by_ref ? "ref " : "", aglat->offset);
613 aglat->print (f, dump_sources, dump_benefits);
619 /* Determine whether it is at all technically possible to create clones of NODE
620 and store this information in the ipa_node_params structure associated
621 with NODE. */
623 static void
624 determine_versionability (struct cgraph_node *node,
625 class ipa_node_params *info)
627 const char *reason = NULL;
629 /* There are a number of generic reasons functions cannot be versioned. We
630 also cannot remove parameters if there are type attributes such as fnspec
631 present. */
632 if (node->alias || node->thunk)
633 reason = "alias or thunk";
634 else if (!node->versionable)
635 reason = "not a tree_versionable_function";
636 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
637 reason = "insufficient body availability";
638 else if (!opt_for_fn (node->decl, optimize)
639 || !opt_for_fn (node->decl, flag_ipa_cp))
640 reason = "non-optimized function";
641 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
643 /* Ideally we should clone the SIMD clones themselves and create
644 vector copies of them, so IPA-cp and SIMD clones can happily
645 coexist, but that may not be worth the effort. */
646 reason = "function has SIMD clones";
648 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
650 /* Ideally we should clone the target clones themselves and create
651 copies of them, so IPA-cp and target clones can happily
652 coexist, but that may not be worth the effort. */
653 reason = "function target_clones attribute";
655 /* Don't clone decls local to a comdat group; it breaks and for C++
656 decloned constructors, inlining is always better anyway. */
657 else if (node->comdat_local_p ())
658 reason = "comdat-local function";
659 else if (node->calls_comdat_local)
661 /* TODO: call is versionable if we make sure that all
662 callers are inside of a comdat group. */
663 reason = "calls comdat-local function";
666 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
667 work only when inlined. Cloning them may still lead to better code
668 because ipa-cp will not give up on cloning further. If the function is
669 external this however leads to wrong code because we may end up producing
670 offline copy of the function. */
671 if (DECL_EXTERNAL (node->decl))
672 for (cgraph_edge *edge = node->callees; !reason && edge;
673 edge = edge->next_callee)
674 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
676 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
677 reason = "external function which calls va_arg_pack";
678 if (DECL_FUNCTION_CODE (edge->callee->decl)
679 == BUILT_IN_VA_ARG_PACK_LEN)
680 reason = "external function which calls va_arg_pack_len";
683 if (reason && dump_file && !node->alias && !node->thunk)
684 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
685 node->dump_name (), reason);
687 info->versionable = (reason == NULL);
690 /* Return true if it is at all technically possible to create clones of a
691 NODE. */
693 static bool
694 ipcp_versionable_function_p (struct cgraph_node *node)
696 ipa_node_params *info = ipa_node_params_sum->get (node);
697 return info && info->versionable;
700 /* Structure holding accumulated information about callers of a node. */
702 struct caller_statistics
704 profile_count count_sum;
705 sreal freq_sum;
706 int n_calls, n_hot_calls;
709 /* Initialize fields of STAT to zeroes. */
711 static inline void
712 init_caller_stats (struct caller_statistics *stats)
714 stats->count_sum = profile_count::zero ();
715 stats->n_calls = 0;
716 stats->n_hot_calls = 0;
717 stats->freq_sum = 0;
720 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
721 non-thunk incoming edges to NODE. */
723 static bool
724 gather_caller_stats (struct cgraph_node *node, void *data)
726 struct caller_statistics *stats = (struct caller_statistics *) data;
727 struct cgraph_edge *cs;
729 for (cs = node->callers; cs; cs = cs->next_caller)
730 if (!cs->caller->thunk)
732 if (cs->count.ipa ().initialized_p ())
733 stats->count_sum += cs->count.ipa ();
734 stats->freq_sum += cs->sreal_frequency ();
735 stats->n_calls++;
736 if (cs->maybe_hot_p ())
737 stats->n_hot_calls ++;
739 return false;
743 /* Return true if this NODE is viable candidate for cloning. */
745 static bool
746 ipcp_cloning_candidate_p (struct cgraph_node *node)
748 struct caller_statistics stats;
750 gcc_checking_assert (node->has_gimple_body_p ());
752 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
754 if (dump_file)
755 fprintf (dump_file, "Not considering %s for cloning; "
756 "-fipa-cp-clone disabled.\n",
757 node->dump_name ());
758 return false;
761 if (node->optimize_for_size_p ())
763 if (dump_file)
764 fprintf (dump_file, "Not considering %s for cloning; "
765 "optimizing it for size.\n",
766 node->dump_name ());
767 return false;
770 init_caller_stats (&stats);
771 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
773 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
775 if (dump_file)
776 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
777 node->dump_name ());
778 return true;
781 /* When profile is available and function is hot, propagate into it even if
782 calls seems cold; constant propagation can improve function's speed
783 significantly. */
784 if (max_count > profile_count::zero ())
786 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
788 if (dump_file)
789 fprintf (dump_file, "Considering %s for cloning; "
790 "usually called directly.\n",
791 node->dump_name ());
792 return true;
795 if (!stats.n_hot_calls)
797 if (dump_file)
798 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
799 node->dump_name ());
800 return false;
802 if (dump_file)
803 fprintf (dump_file, "Considering %s for cloning.\n",
804 node->dump_name ());
805 return true;
808 template <typename valtype>
809 class value_topo_info
811 public:
812 /* Head of the linked list of topologically sorted values. */
813 ipcp_value<valtype> *values_topo;
814 /* Stack for creating SCCs, represented by a linked list too. */
815 ipcp_value<valtype> *stack;
816 /* Counter driving the algorithm in add_val_to_toposort. */
817 int dfs_counter;
819 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
821 void add_val (ipcp_value<valtype> *cur_val);
822 void propagate_effects ();
825 /* Arrays representing a topological ordering of call graph nodes and a stack
826 of nodes used during constant propagation and also data required to perform
827 topological sort of values and propagation of benefits in the determined
828 order. */
830 class ipa_topo_info
832 public:
833 /* Array with obtained topological order of cgraph nodes. */
834 struct cgraph_node **order;
835 /* Stack of cgraph nodes used during propagation within SCC until all values
836 in the SCC stabilize. */
837 struct cgraph_node **stack;
838 int nnodes, stack_top;
840 value_topo_info<tree> constants;
841 value_topo_info<ipa_polymorphic_call_context> contexts;
843 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
844 constants ()
848 /* Skip edges from and to nodes without ipa_cp enabled.
849 Ignore not available symbols. */
851 static bool
852 ignore_edge_p (cgraph_edge *e)
854 enum availability avail;
855 cgraph_node *ultimate_target
856 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
858 return (avail <= AVAIL_INTERPOSABLE
859 || !opt_for_fn (ultimate_target->decl, optimize)
860 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
863 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
865 static void
866 build_toporder_info (class ipa_topo_info *topo)
868 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
869 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
871 gcc_checking_assert (topo->stack_top == 0);
872 topo->nnodes = ipa_reduced_postorder (topo->order, true,
873 ignore_edge_p);
876 /* Free information about strongly connected components and the arrays in
877 TOPO. */
879 static void
880 free_toporder_info (class ipa_topo_info *topo)
882 ipa_free_postorder_info ();
883 free (topo->order);
884 free (topo->stack);
887 /* Add NODE to the stack in TOPO, unless it is already there. */
889 static inline void
890 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
892 ipa_node_params *info = ipa_node_params_sum->get (node);
893 if (info->node_enqueued)
894 return;
895 info->node_enqueued = 1;
896 topo->stack[topo->stack_top++] = node;
899 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
900 is empty. */
902 static struct cgraph_node *
903 pop_node_from_stack (class ipa_topo_info *topo)
905 if (topo->stack_top)
907 struct cgraph_node *node;
908 topo->stack_top--;
909 node = topo->stack[topo->stack_top];
910 ipa_node_params_sum->get (node)->node_enqueued = 0;
911 return node;
913 else
914 return NULL;
917 /* Set lattice LAT to bottom and return true if it previously was not set as
918 such. */
920 template <typename valtype>
921 inline bool
922 ipcp_lattice<valtype>::set_to_bottom ()
924 bool ret = !bottom;
925 bottom = true;
926 return ret;
929 /* Mark lattice as containing an unknown value and return true if it previously
930 was not marked as such. */
932 template <typename valtype>
933 inline bool
934 ipcp_lattice<valtype>::set_contains_variable ()
936 bool ret = !contains_variable;
937 contains_variable = true;
938 return ret;
941 /* Set all aggregate lattices in PLATS to bottom and return true if they were
942 not previously set as such. */
944 static inline bool
945 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
947 bool ret = !plats->aggs_bottom;
948 plats->aggs_bottom = true;
949 return ret;
952 /* Mark all aggregate lattices in PLATS as containing an unknown value and
953 return true if they were not previously marked as such. */
955 static inline bool
956 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
958 bool ret = !plats->aggs_contain_variable;
959 plats->aggs_contain_variable = true;
960 return ret;
963 bool
964 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
966 return meet_with_1 (&other.m_vr);
969 /* Meet the current value of the lattice with value range described by VR
970 lattice. */
972 bool
973 ipcp_vr_lattice::meet_with (const value_range *p_vr)
975 return meet_with_1 (p_vr);
978 /* Meet the current value of the lattice with value range described by
979 OTHER_VR lattice. Return TRUE if anything changed. */
981 bool
982 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
984 if (bottom_p ())
985 return false;
987 if (other_vr->varying_p ())
988 return set_to_bottom ();
990 value_range save (m_vr);
991 m_vr.union_ (other_vr);
992 return !m_vr.equal_p (save);
995 /* Return true if value range information in the lattice is yet unknown. */
997 bool
998 ipcp_vr_lattice::top_p () const
1000 return m_vr.undefined_p ();
1003 /* Return true if value range information in the lattice is known to be
1004 unusable. */
1006 bool
1007 ipcp_vr_lattice::bottom_p () const
1009 return m_vr.varying_p ();
1012 /* Set value range information in the lattice to bottom. Return true if it
1013 previously was in a different state. */
1015 bool
1016 ipcp_vr_lattice::set_to_bottom ()
1018 if (m_vr.varying_p ())
1019 return false;
1020 /* ?? We create all sorts of VARYING ranges for floats, structures,
1021 and other types which we cannot handle as ranges. We should
1022 probably avoid handling them throughout the pass, but it's easier
1023 to create a sensible VARYING here and let the lattice
1024 propagate. */
1025 m_vr.set_varying (integer_type_node);
1026 return true;
1029 /* Set lattice value to bottom, if it already isn't the case. */
1031 bool
1032 ipcp_bits_lattice::set_to_bottom ()
1034 if (bottom_p ())
1035 return false;
1036 m_lattice_val = IPA_BITS_VARYING;
1037 m_value = 0;
1038 m_mask = -1;
1039 return true;
1042 /* Set to constant if it isn't already. Only meant to be called
1043 when switching state from TOP. */
1045 bool
1046 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1048 gcc_assert (top_p ());
1049 m_lattice_val = IPA_BITS_CONSTANT;
1050 m_value = wi::bit_and (wi::bit_not (mask), value);
1051 m_mask = mask;
1052 return true;
1055 /* Convert operand to value, mask form. */
1057 void
1058 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1060 wide_int get_nonzero_bits (const_tree);
1062 if (TREE_CODE (operand) == INTEGER_CST)
1064 *valuep = wi::to_widest (operand);
1065 *maskp = 0;
1067 else
1069 *valuep = 0;
1070 *maskp = -1;
1074 /* Meet operation, similar to ccp_lattice_meet, we xor values
1075 if this->value, value have different values at same bit positions, we want
1076 to drop that bit to varying. Return true if mask is changed.
1077 This function assumes that the lattice value is in CONSTANT state */
1079 bool
1080 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1081 unsigned precision)
1083 gcc_assert (constant_p ());
1085 widest_int old_mask = m_mask;
1086 m_mask = (m_mask | mask) | (m_value ^ value);
1087 m_value &= ~m_mask;
1089 if (wi::sext (m_mask, precision) == -1)
1090 return set_to_bottom ();
1092 return m_mask != old_mask;
1095 /* Meet the bits lattice with operand
1096 described by <value, mask, sgn, precision. */
1098 bool
1099 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1100 unsigned precision)
1102 if (bottom_p ())
1103 return false;
1105 if (top_p ())
1107 if (wi::sext (mask, precision) == -1)
1108 return set_to_bottom ();
1109 return set_to_constant (value, mask);
1112 return meet_with_1 (value, mask, precision);
1115 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1116 if code is binary operation or bit_value_unop (other) if code is unary op.
1117 In the case when code is nop_expr, no adjustment is required. */
1119 bool
1120 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1121 signop sgn, enum tree_code code, tree operand)
1123 if (other.bottom_p ())
1124 return set_to_bottom ();
1126 if (bottom_p () || other.top_p ())
1127 return false;
1129 widest_int adjusted_value, adjusted_mask;
1131 if (TREE_CODE_CLASS (code) == tcc_binary)
1133 tree type = TREE_TYPE (operand);
1134 widest_int o_value, o_mask;
1135 get_value_and_mask (operand, &o_value, &o_mask);
1137 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1138 sgn, precision, other.get_value (), other.get_mask (),
1139 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1141 if (wi::sext (adjusted_mask, precision) == -1)
1142 return set_to_bottom ();
1145 else if (TREE_CODE_CLASS (code) == tcc_unary)
1147 bit_value_unop (code, sgn, precision, &adjusted_value,
1148 &adjusted_mask, sgn, precision, other.get_value (),
1149 other.get_mask ());
1151 if (wi::sext (adjusted_mask, precision) == -1)
1152 return set_to_bottom ();
1155 else
1156 return set_to_bottom ();
1158 if (top_p ())
1160 if (wi::sext (adjusted_mask, precision) == -1)
1161 return set_to_bottom ();
1162 return set_to_constant (adjusted_value, adjusted_mask);
1164 else
1165 return meet_with_1 (adjusted_value, adjusted_mask, precision);
1168 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1169 return true is any of them has not been marked as such so far. */
1171 static inline bool
1172 set_all_contains_variable (class ipcp_param_lattices *plats)
1174 bool ret;
1175 ret = plats->itself.set_contains_variable ();
1176 ret |= plats->ctxlat.set_contains_variable ();
1177 ret |= set_agg_lats_contain_variable (plats);
1178 ret |= plats->bits_lattice.set_to_bottom ();
1179 ret |= plats->m_value_range.set_to_bottom ();
1180 return ret;
1183 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1184 points to by the number of callers to NODE. */
1186 static bool
1187 count_callers (cgraph_node *node, void *data)
1189 int *caller_count = (int *) data;
1191 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1192 /* Local thunks can be handled transparently, but if the thunk cannot
1193 be optimized out, count it as a real use. */
1194 if (!cs->caller->thunk || !cs->caller->local)
1195 ++*caller_count;
1196 return false;
1199 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1200 the one caller of some other node. Set the caller's corresponding flag. */
1202 static bool
1203 set_single_call_flag (cgraph_node *node, void *)
1205 cgraph_edge *cs = node->callers;
1206 /* Local thunks can be handled transparently, skip them. */
1207 while (cs && cs->caller->thunk && cs->caller->local)
1208 cs = cs->next_caller;
1209 if (cs)
1210 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1212 info->node_calling_single_call = true;
1213 return true;
1215 return false;
1218 /* Initialize ipcp_lattices. */
1220 static void
1221 initialize_node_lattices (struct cgraph_node *node)
1223 ipa_node_params *info = ipa_node_params_sum->get (node);
1224 struct cgraph_edge *ie;
1225 bool disable = false, variable = false;
1226 int i;
1228 gcc_checking_assert (node->has_gimple_body_p ());
1230 if (!ipa_get_param_count (info))
1231 disable = true;
1232 else if (node->local)
1234 int caller_count = 0;
1235 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1236 true);
1237 gcc_checking_assert (caller_count > 0);
1238 if (caller_count == 1)
1239 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1240 NULL, true);
1242 else
1244 /* When cloning is allowed, we can assume that externally visible
1245 functions are not called. We will compensate this by cloning
1246 later. */
1247 if (ipcp_versionable_function_p (node)
1248 && ipcp_cloning_candidate_p (node))
1249 variable = true;
1250 else
1251 disable = true;
1254 if (dump_file && (dump_flags & TDF_DETAILS)
1255 && !node->alias && !node->thunk)
1257 fprintf (dump_file, "Initializing lattices of %s\n",
1258 node->dump_name ());
1259 if (disable || variable)
1260 fprintf (dump_file, " Marking all lattices as %s\n",
1261 disable ? "BOTTOM" : "VARIABLE");
1264 auto_vec<bool, 16> surviving_params;
1265 bool pre_modified = false;
1267 clone_info *cinfo = clone_info::get (node);
1269 if (!disable && cinfo && cinfo->param_adjustments)
1271 /* At the moment all IPA optimizations should use the number of
1272 parameters of the prevailing decl as the m_always_copy_start.
1273 Handling any other value would complicate the code below, so for the
1274 time bing let's only assert it is so. */
1275 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1276 == ipa_get_param_count (info))
1277 || cinfo->param_adjustments->m_always_copy_start < 0);
1279 pre_modified = true;
1280 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1282 if (dump_file && (dump_flags & TDF_DETAILS)
1283 && !node->alias && !node->thunk)
1285 bool first = true;
1286 for (int j = 0; j < ipa_get_param_count (info); j++)
1288 if (j < (int) surviving_params.length ()
1289 && surviving_params[j])
1290 continue;
1291 if (first)
1293 fprintf (dump_file,
1294 " The following parameters are dead on arrival:");
1295 first = false;
1297 fprintf (dump_file, " %u", j);
1299 if (!first)
1300 fprintf (dump_file, "\n");
1304 for (i = 0; i < ipa_get_param_count (info); i++)
1306 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1307 if (disable
1308 || !ipa_get_type (info, i)
1309 || (pre_modified && (surviving_params.length () <= (unsigned) i
1310 || !surviving_params[i])))
1312 plats->itself.set_to_bottom ();
1313 plats->ctxlat.set_to_bottom ();
1314 set_agg_lats_to_bottom (plats);
1315 plats->bits_lattice.set_to_bottom ();
1316 plats->m_value_range.m_vr = value_range ();
1317 plats->m_value_range.set_to_bottom ();
1319 else
1321 plats->m_value_range.init ();
1322 if (variable)
1323 set_all_contains_variable (plats);
1327 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1328 if (ie->indirect_info->polymorphic
1329 && ie->indirect_info->param_index >= 0)
1331 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1332 ipa_get_parm_lattices (info,
1333 ie->indirect_info->param_index)->virt_call = 1;
1337 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1338 PARAM_TYPE. */
1340 static bool
1341 ipacp_value_safe_for_type (tree param_type, tree value)
1343 tree val_type = TREE_TYPE (value);
1344 if (param_type == val_type
1345 || useless_type_conversion_p (param_type, val_type)
1346 || fold_convertible_p (param_type, value))
1347 return true;
1348 else
1349 return false;
1352 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1354 static bool
1355 values_equal_for_ipcp_p (tree x, tree y)
1357 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1359 if (x == y)
1360 return true;
1362 if (TREE_CODE (x) == ADDR_EXPR
1363 && TREE_CODE (y) == ADDR_EXPR
1364 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1365 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1366 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1367 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1368 else
1369 return operand_equal_p (x, y, 0);
1372 /* Return the result of a (possibly arithmetic) operation on the constant
1373 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1374 the type of the parameter to which the result is passed. Return
1375 NULL_TREE if that cannot be determined or be considered an
1376 interprocedural invariant. */
1378 static tree
1379 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1380 tree res_type)
1382 tree res;
1384 if (opcode == NOP_EXPR)
1385 return input;
1386 if (!is_gimple_ip_invariant (input))
1387 return NULL_TREE;
1389 if (opcode == ASSERT_EXPR)
1391 if (values_equal_for_ipcp_p (input, operand))
1392 return input;
1393 else
1394 return NULL_TREE;
1397 if (!res_type)
1399 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1400 res_type = boolean_type_node;
1401 else if (expr_type_first_operand_type_p (opcode))
1402 res_type = TREE_TYPE (input);
1403 else
1404 return NULL_TREE;
1407 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1408 res = fold_unary (opcode, res_type, input);
1409 else
1410 res = fold_binary (opcode, res_type, input, operand);
1412 if (res && !is_gimple_ip_invariant (res))
1413 return NULL_TREE;
1415 return res;
1418 /* Return the result of a (possibly arithmetic) pass through jump function
1419 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1420 to which the result is passed. Return NULL_TREE if that cannot be
1421 determined or be considered an interprocedural invariant. */
1423 static tree
1424 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1425 tree res_type)
1427 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1428 input,
1429 ipa_get_jf_pass_through_operand (jfunc),
1430 res_type);
1433 /* Return the result of an ancestor jump function JFUNC on the constant value
1434 INPUT. Return NULL_TREE if that cannot be determined. */
1436 static tree
1437 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1439 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1440 if (TREE_CODE (input) == ADDR_EXPR)
1442 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1443 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1444 if (known_eq (off, 0))
1445 return input;
1446 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1447 return build1 (ADDR_EXPR, TREE_TYPE (input),
1448 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1449 build_int_cst (ptr_type_node, byte_offset)));
1451 else
1452 return NULL_TREE;
1455 /* Determine whether JFUNC evaluates to a single known constant value and if
1456 so, return it. Otherwise return NULL. INFO describes the caller node or
1457 the one it is inlined to, so that pass-through jump functions can be
1458 evaluated. PARM_TYPE is the type of the parameter to which the result is
1459 passed. */
1461 tree
1462 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1463 tree parm_type)
1465 if (jfunc->type == IPA_JF_CONST)
1466 return ipa_get_jf_constant (jfunc);
1467 else if (jfunc->type == IPA_JF_PASS_THROUGH
1468 || jfunc->type == IPA_JF_ANCESTOR)
1470 tree input;
1471 int idx;
1473 if (jfunc->type == IPA_JF_PASS_THROUGH)
1474 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1475 else
1476 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1478 if (info->ipcp_orig_node)
1479 input = info->known_csts[idx];
1480 else
1482 ipcp_lattice<tree> *lat;
1484 if (!info->lattices
1485 || idx >= ipa_get_param_count (info))
1486 return NULL_TREE;
1487 lat = ipa_get_scalar_lat (info, idx);
1488 if (!lat->is_single_const ())
1489 return NULL_TREE;
1490 input = lat->values->value;
1493 if (!input)
1494 return NULL_TREE;
1496 if (jfunc->type == IPA_JF_PASS_THROUGH)
1497 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1498 else
1499 return ipa_get_jf_ancestor_result (jfunc, input);
1501 else
1502 return NULL_TREE;
1505 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1506 that INFO describes the caller node or the one it is inlined to, CS is the
1507 call graph edge corresponding to JFUNC and CSIDX index of the described
1508 parameter. */
1510 ipa_polymorphic_call_context
1511 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1512 ipa_jump_func *jfunc)
1514 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1515 ipa_polymorphic_call_context ctx;
1516 ipa_polymorphic_call_context *edge_ctx
1517 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1519 if (edge_ctx && !edge_ctx->useless_p ())
1520 ctx = *edge_ctx;
1522 if (jfunc->type == IPA_JF_PASS_THROUGH
1523 || jfunc->type == IPA_JF_ANCESTOR)
1525 ipa_polymorphic_call_context srcctx;
1526 int srcidx;
1527 bool type_preserved = true;
1528 if (jfunc->type == IPA_JF_PASS_THROUGH)
1530 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1531 return ctx;
1532 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1533 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1535 else
1537 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1538 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1540 if (info->ipcp_orig_node)
1542 if (info->known_contexts.exists ())
1543 srcctx = info->known_contexts[srcidx];
1545 else
1547 if (!info->lattices
1548 || srcidx >= ipa_get_param_count (info))
1549 return ctx;
1550 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1551 lat = ipa_get_poly_ctx_lat (info, srcidx);
1552 if (!lat->is_single_const ())
1553 return ctx;
1554 srcctx = lat->values->value;
1556 if (srcctx.useless_p ())
1557 return ctx;
1558 if (jfunc->type == IPA_JF_ANCESTOR)
1559 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1560 if (!type_preserved)
1561 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1562 srcctx.combine_with (ctx);
1563 return srcctx;
1566 return ctx;
1569 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1570 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1571 the result is a range or an anti-range. */
1573 static bool
1574 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1575 value_range *src_vr,
1576 enum tree_code operation,
1577 tree dst_type, tree src_type)
1579 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1580 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1581 return false;
1582 return true;
1585 /* Determine value_range of JFUNC given that INFO describes the caller node or
1586 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1587 and PARM_TYPE of the parameter. */
1589 value_range
1590 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1591 ipa_jump_func *jfunc, tree parm_type)
1593 value_range vr;
1594 return vr;
1595 if (jfunc->m_vr)
1596 ipa_vr_operation_and_type_effects (&vr,
1597 jfunc->m_vr,
1598 NOP_EXPR, parm_type,
1599 jfunc->m_vr->type ());
1600 if (vr.singleton_p ())
1601 return vr;
1602 if (jfunc->type == IPA_JF_PASS_THROUGH)
1604 int idx;
1605 ipcp_transformation *sum
1606 = ipcp_get_transformation_summary (cs->caller->inlined_to
1607 ? cs->caller->inlined_to
1608 : cs->caller);
1609 if (!sum || !sum->m_vr)
1610 return vr;
1612 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1614 if (!(*sum->m_vr)[idx].known)
1615 return vr;
1616 tree vr_type = ipa_get_type (info, idx);
1617 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1618 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1619 (*sum->m_vr)[idx].type);
1621 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1623 if (TREE_CODE_CLASS (operation) == tcc_unary)
1625 value_range res;
1627 if (ipa_vr_operation_and_type_effects (&res,
1628 &srcvr,
1629 operation, parm_type,
1630 vr_type))
1631 vr.intersect (res);
1633 else
1635 value_range op_res, res;
1636 tree op = ipa_get_jf_pass_through_operand (jfunc);
1637 value_range op_vr (op, op);
1639 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1640 if (ipa_vr_operation_and_type_effects (&res,
1641 &op_res,
1642 NOP_EXPR, parm_type,
1643 vr_type))
1644 vr.intersect (res);
1647 return vr;
1650 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1651 parameter with the given INDEX. */
1653 static tree
1654 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1655 int index)
1657 struct ipa_agg_replacement_value *aggval;
1659 aggval = ipa_get_agg_replacements_for_node (node);
1660 while (aggval)
1662 if (aggval->offset == offset
1663 && aggval->index == index)
1664 return aggval->value;
1665 aggval = aggval->next;
1667 return NULL_TREE;
1670 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1671 single known constant value and if so, return it. Otherwise return NULL.
1672 NODE and INFO describes the caller node or the one it is inlined to, and
1673 its related info. */
1675 static tree
1676 ipa_agg_value_from_node (class ipa_node_params *info,
1677 struct cgraph_node *node,
1678 struct ipa_agg_jf_item *item)
1680 tree value = NULL_TREE;
1681 int src_idx;
1683 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1684 return NULL_TREE;
1686 if (item->jftype == IPA_JF_CONST)
1687 return item->value.constant;
1689 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1690 || item->jftype == IPA_JF_LOAD_AGG);
1692 src_idx = item->value.pass_through.formal_id;
1694 if (info->ipcp_orig_node)
1696 if (item->jftype == IPA_JF_PASS_THROUGH)
1697 value = info->known_csts[src_idx];
1698 else
1699 value = get_clone_agg_value (node, item->value.load_agg.offset,
1700 src_idx);
1702 else if (info->lattices)
1704 class ipcp_param_lattices *src_plats
1705 = ipa_get_parm_lattices (info, src_idx);
1707 if (item->jftype == IPA_JF_PASS_THROUGH)
1709 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1711 if (!lat->is_single_const ())
1712 return NULL_TREE;
1714 value = lat->values->value;
1716 else if (src_plats->aggs
1717 && !src_plats->aggs_bottom
1718 && !src_plats->aggs_contain_variable
1719 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1721 struct ipcp_agg_lattice *aglat;
1723 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1725 if (aglat->offset > item->value.load_agg.offset)
1726 break;
1728 if (aglat->offset == item->value.load_agg.offset)
1730 if (aglat->is_single_const ())
1731 value = aglat->values->value;
1732 break;
1738 if (!value)
1739 return NULL_TREE;
1741 if (item->jftype == IPA_JF_LOAD_AGG)
1743 tree load_type = item->value.load_agg.type;
1744 tree value_type = TREE_TYPE (value);
1746 /* Ensure value type is compatible with load type. */
1747 if (!useless_type_conversion_p (load_type, value_type))
1748 return NULL_TREE;
1751 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1752 value,
1753 item->value.pass_through.operand,
1754 item->type);
1757 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1758 an aggregate and if so, return it. Otherwise return an empty set. NODE
1759 and INFO describes the caller node or the one it is inlined to, and its
1760 related info. */
1762 struct ipa_agg_value_set
1763 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1764 struct ipa_agg_jump_function *agg_jfunc)
1766 struct ipa_agg_value_set agg;
1767 struct ipa_agg_jf_item *item;
1768 int i;
1770 agg.items = vNULL;
1771 agg.by_ref = agg_jfunc->by_ref;
1773 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1775 tree value = ipa_agg_value_from_node (info, node, item);
1777 if (value)
1779 struct ipa_agg_value value_item;
1781 value_item.offset = item->offset;
1782 value_item.value = value;
1784 agg.items.safe_push (value_item);
1787 return agg;
1790 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1791 bottom, not containing a variable component and without any known value at
1792 the same time. */
1794 DEBUG_FUNCTION void
1795 ipcp_verify_propagated_values (void)
1797 struct cgraph_node *node;
1799 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1801 ipa_node_params *info = ipa_node_params_sum->get (node);
1802 if (!opt_for_fn (node->decl, flag_ipa_cp)
1803 || !opt_for_fn (node->decl, optimize))
1804 continue;
1805 int i, count = ipa_get_param_count (info);
1807 for (i = 0; i < count; i++)
1809 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1811 if (!lat->bottom
1812 && !lat->contains_variable
1813 && lat->values_count == 0)
1815 if (dump_file)
1817 symtab->dump (dump_file);
1818 fprintf (dump_file, "\nIPA lattices after constant "
1819 "propagation, before gcc_unreachable:\n");
1820 print_all_lattices (dump_file, true, false);
1823 gcc_unreachable ();
1829 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1831 static bool
1832 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1833 ipa_polymorphic_call_context y)
1835 return x.equal_to (y);
1839 /* Add a new value source to the value represented by THIS, marking that a
1840 value comes from edge CS and (if the underlying jump function is a
1841 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1842 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1843 scalar value of the parameter itself or the offset within an aggregate. */
1845 template <typename valtype>
1846 void
1847 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1848 int src_idx, HOST_WIDE_INT offset)
1850 ipcp_value_source<valtype> *src;
1852 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1853 src->offset = offset;
1854 src->cs = cs;
1855 src->val = src_val;
1856 src->index = src_idx;
1858 src->next = sources;
1859 sources = src;
1862 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1863 SOURCE and clear all other fields. */
1865 static ipcp_value<tree> *
1866 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1868 ipcp_value<tree> *val;
1870 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1871 val->value = cst;
1872 val->self_recursion_generated_level = same_lat_gen_level;
1873 return val;
1876 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1877 value to SOURCE and clear all other fields. */
1879 static ipcp_value<ipa_polymorphic_call_context> *
1880 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1881 unsigned same_lat_gen_level)
1883 ipcp_value<ipa_polymorphic_call_context> *val;
1885 val = new (ipcp_poly_ctx_values_pool.allocate ())
1886 ipcp_value<ipa_polymorphic_call_context>();
1887 val->value = ctx;
1888 val->self_recursion_generated_level = same_lat_gen_level;
1889 return val;
1892 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1893 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1894 meaning. OFFSET -1 means the source is scalar and not a part of an
1895 aggregate. If non-NULL, VAL_P records address of existing or newly added
1896 ipcp_value.
1898 If the value is generated for a self-recursive call as a result of an
1899 arithmetic pass-through jump-function acting on a value in the same lattice,
1900 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1901 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1903 template <typename valtype>
1904 bool
1905 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1906 ipcp_value<valtype> *src_val,
1907 int src_idx, HOST_WIDE_INT offset,
1908 ipcp_value<valtype> **val_p,
1909 unsigned same_lat_gen_level)
1911 ipcp_value<valtype> *val, *last_val = NULL;
1913 if (val_p)
1914 *val_p = NULL;
1916 if (bottom)
1917 return false;
1919 for (val = values; val; last_val = val, val = val->next)
1920 if (values_equal_for_ipcp_p (val->value, newval))
1922 if (val_p)
1923 *val_p = val;
1925 if (val->self_recursion_generated_level < same_lat_gen_level)
1926 val->self_recursion_generated_level = same_lat_gen_level;
1928 if (ipa_edge_within_scc (cs))
1930 ipcp_value_source<valtype> *s;
1931 for (s = val->sources; s; s = s->next)
1932 if (s->cs == cs && s->val == src_val)
1933 break;
1934 if (s)
1935 return false;
1938 val->add_source (cs, src_val, src_idx, offset);
1939 return false;
1942 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1943 param_ipa_cp_value_list_size))
1945 /* We can only free sources, not the values themselves, because sources
1946 of other values in this SCC might point to them. */
1947 for (val = values; val; val = val->next)
1949 while (val->sources)
1951 ipcp_value_source<valtype> *src = val->sources;
1952 val->sources = src->next;
1953 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
1956 values = NULL;
1957 return set_to_bottom ();
1960 values_count++;
1961 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
1962 val->add_source (cs, src_val, src_idx, offset);
1963 val->next = NULL;
1965 /* Add the new value to end of value list, which can reduce iterations
1966 of propagation stage for recursive function. */
1967 if (last_val)
1968 last_val->next = val;
1969 else
1970 values = val;
1972 if (val_p)
1973 *val_p = val;
1975 return true;
1978 /* A helper function that returns result of operation specified by OPCODE on
1979 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
1980 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
1981 acting as its second operand. If non-NULL, RES_TYPE is expected type of
1982 the result. */
1984 static tree
1985 get_val_across_arith_op (enum tree_code opcode,
1986 tree opnd1_type,
1987 tree opnd2,
1988 ipcp_value<tree> *src_val,
1989 tree res_type)
1991 tree opnd1 = src_val->value;
1993 /* Skip source values that is incompatible with specified type. */
1994 if (opnd1_type
1995 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
1996 return NULL_TREE;
1998 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2001 /* Propagate values through an arithmetic transformation described by a jump
2002 function associated with edge CS, taking values from SRC_LAT and putting
2003 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2004 OPND2 is a constant value if transformation is a binary operation.
2005 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2006 a part of the aggregate. SRC_IDX is the index of the source parameter.
2007 RES_TYPE is the value type of result being propagated into. Return true if
2008 DEST_LAT changed. */
2010 static bool
2011 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2012 enum tree_code opcode,
2013 tree opnd1_type,
2014 tree opnd2,
2015 ipcp_lattice<tree> *src_lat,
2016 ipcp_lattice<tree> *dest_lat,
2017 HOST_WIDE_INT src_offset,
2018 int src_idx,
2019 tree res_type)
2021 ipcp_value<tree> *src_val;
2022 bool ret = false;
2024 /* Due to circular dependencies, propagating within an SCC through arithmetic
2025 transformation would create infinite number of values. But for
2026 self-feeding recursive function, we could allow propagation in a limited
2027 count, and this can enable a simple kind of recursive function versioning.
2028 For other scenario, we would just make lattices bottom. */
2029 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2031 int i;
2033 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2034 param_ipa_cp_max_recursive_depth);
2035 if (src_lat != dest_lat || max_recursive_depth < 1)
2036 return dest_lat->set_contains_variable ();
2038 /* No benefit if recursive execution is in low probability. */
2039 if (cs->sreal_frequency () * 100
2040 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2041 param_ipa_cp_min_recursive_probability))
2042 return dest_lat->set_contains_variable ();
2044 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2046 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2048 /* Now we do not use self-recursively generated value as propagation
2049 source, this is absolutely conservative, but could avoid explosion
2050 of lattice's value space, especially when one recursive function
2051 calls another recursive. */
2052 if (src_val->self_recursion_generated_p ())
2054 ipcp_value_source<tree> *s;
2056 /* If the lattice has already been propagated for the call site,
2057 no need to do that again. */
2058 for (s = src_val->sources; s; s = s->next)
2059 if (s->cs == cs)
2060 return dest_lat->set_contains_variable ();
2062 else
2063 val_seeds.safe_push (src_val);
2066 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2068 /* Recursively generate lattice values with a limited count. */
2069 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2071 for (int j = 1; j < max_recursive_depth; j++)
2073 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2074 src_val, res_type);
2075 if (!cstval
2076 || !ipacp_value_safe_for_type (res_type, cstval))
2077 break;
2079 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2080 src_offset, &src_val, j);
2081 gcc_checking_assert (src_val);
2084 ret |= dest_lat->set_contains_variable ();
2086 else
2087 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2089 /* Now we do not use self-recursively generated value as propagation
2090 source, otherwise it is easy to make value space of normal lattice
2091 overflow. */
2092 if (src_val->self_recursion_generated_p ())
2094 ret |= dest_lat->set_contains_variable ();
2095 continue;
2098 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2099 src_val, res_type);
2100 if (cstval
2101 && ipacp_value_safe_for_type (res_type, cstval))
2102 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2103 src_offset);
2104 else
2105 ret |= dest_lat->set_contains_variable ();
2108 return ret;
2111 /* Propagate values through a pass-through jump function JFUNC associated with
2112 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2113 is the index of the source parameter. PARM_TYPE is the type of the
2114 parameter to which the result is passed. */
2116 static bool
2117 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2118 ipcp_lattice<tree> *src_lat,
2119 ipcp_lattice<tree> *dest_lat, int src_idx,
2120 tree parm_type)
2122 return propagate_vals_across_arith_jfunc (cs,
2123 ipa_get_jf_pass_through_operation (jfunc),
2124 NULL_TREE,
2125 ipa_get_jf_pass_through_operand (jfunc),
2126 src_lat, dest_lat, -1, src_idx, parm_type);
2129 /* Propagate values through an ancestor jump function JFUNC associated with
2130 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2131 is the index of the source parameter. */
2133 static bool
2134 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2135 struct ipa_jump_func *jfunc,
2136 ipcp_lattice<tree> *src_lat,
2137 ipcp_lattice<tree> *dest_lat, int src_idx,
2138 tree param_type)
2140 ipcp_value<tree> *src_val;
2141 bool ret = false;
2143 if (ipa_edge_within_scc (cs))
2144 return dest_lat->set_contains_variable ();
2146 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2148 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2150 if (t && ipacp_value_safe_for_type (param_type, t))
2151 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2152 else
2153 ret |= dest_lat->set_contains_variable ();
2156 return ret;
2159 /* Propagate scalar values across jump function JFUNC that is associated with
2160 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2161 parameter to which the result is passed. */
2163 static bool
2164 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2165 struct ipa_jump_func *jfunc,
2166 ipcp_lattice<tree> *dest_lat,
2167 tree param_type)
2169 if (dest_lat->bottom)
2170 return false;
2172 if (jfunc->type == IPA_JF_CONST)
2174 tree val = ipa_get_jf_constant (jfunc);
2175 if (ipacp_value_safe_for_type (param_type, val))
2176 return dest_lat->add_value (val, cs, NULL, 0);
2177 else
2178 return dest_lat->set_contains_variable ();
2180 else if (jfunc->type == IPA_JF_PASS_THROUGH
2181 || jfunc->type == IPA_JF_ANCESTOR)
2183 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2184 ipcp_lattice<tree> *src_lat;
2185 int src_idx;
2186 bool ret;
2188 if (jfunc->type == IPA_JF_PASS_THROUGH)
2189 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2190 else
2191 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2193 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2194 if (src_lat->bottom)
2195 return dest_lat->set_contains_variable ();
2197 /* If we would need to clone the caller and cannot, do not propagate. */
2198 if (!ipcp_versionable_function_p (cs->caller)
2199 && (src_lat->contains_variable
2200 || (src_lat->values_count > 1)))
2201 return dest_lat->set_contains_variable ();
2203 if (jfunc->type == IPA_JF_PASS_THROUGH)
2204 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2205 dest_lat, src_idx,
2206 param_type);
2207 else
2208 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2209 src_idx, param_type);
2211 if (src_lat->contains_variable)
2212 ret |= dest_lat->set_contains_variable ();
2214 return ret;
2217 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2218 use it for indirect inlining), we should propagate them too. */
2219 return dest_lat->set_contains_variable ();
2222 /* Propagate scalar values across jump function JFUNC that is associated with
2223 edge CS and describes argument IDX and put the values into DEST_LAT. */
2225 static bool
2226 propagate_context_across_jump_function (cgraph_edge *cs,
2227 ipa_jump_func *jfunc, int idx,
2228 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2230 if (dest_lat->bottom)
2231 return false;
2232 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2233 bool ret = false;
2234 bool added_sth = false;
2235 bool type_preserved = true;
2237 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2238 = ipa_get_ith_polymorhic_call_context (args, idx);
2240 if (edge_ctx_ptr)
2241 edge_ctx = *edge_ctx_ptr;
2243 if (jfunc->type == IPA_JF_PASS_THROUGH
2244 || jfunc->type == IPA_JF_ANCESTOR)
2246 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2247 int src_idx;
2248 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2250 /* TODO: Once we figure out how to propagate speculations, it will
2251 probably be a good idea to switch to speculation if type_preserved is
2252 not set instead of punting. */
2253 if (jfunc->type == IPA_JF_PASS_THROUGH)
2255 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2256 goto prop_fail;
2257 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2258 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2260 else
2262 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2263 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2266 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2267 /* If we would need to clone the caller and cannot, do not propagate. */
2268 if (!ipcp_versionable_function_p (cs->caller)
2269 && (src_lat->contains_variable
2270 || (src_lat->values_count > 1)))
2271 goto prop_fail;
2273 ipcp_value<ipa_polymorphic_call_context> *src_val;
2274 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2276 ipa_polymorphic_call_context cur = src_val->value;
2278 if (!type_preserved)
2279 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2280 if (jfunc->type == IPA_JF_ANCESTOR)
2281 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2282 /* TODO: In cases we know how the context is going to be used,
2283 we can improve the result by passing proper OTR_TYPE. */
2284 cur.combine_with (edge_ctx);
2285 if (!cur.useless_p ())
2287 if (src_lat->contains_variable
2288 && !edge_ctx.equal_to (cur))
2289 ret |= dest_lat->set_contains_variable ();
2290 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2291 added_sth = true;
2296 prop_fail:
2297 if (!added_sth)
2299 if (!edge_ctx.useless_p ())
2300 ret |= dest_lat->add_value (edge_ctx, cs);
2301 else
2302 ret |= dest_lat->set_contains_variable ();
2305 return ret;
2308 /* Propagate bits across jfunc that is associated with
2309 edge cs and update dest_lattice accordingly. */
2311 bool
2312 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2313 ipa_jump_func *jfunc,
2314 ipcp_bits_lattice *dest_lattice)
2316 if (dest_lattice->bottom_p ())
2317 return false;
2319 enum availability availability;
2320 cgraph_node *callee = cs->callee->function_symbol (&availability);
2321 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2322 tree parm_type = ipa_get_type (callee_info, idx);
2324 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2325 transform for these cases. Similarly, we can have bad type mismatches
2326 with LTO, avoid doing anything with those too. */
2327 if (!parm_type
2328 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2330 if (dump_file && (dump_flags & TDF_DETAILS))
2331 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2332 "param %i of %s is NULL or unsuitable for bits propagation\n",
2333 idx, cs->callee->dump_name ());
2335 return dest_lattice->set_to_bottom ();
2338 unsigned precision = TYPE_PRECISION (parm_type);
2339 signop sgn = TYPE_SIGN (parm_type);
2341 if (jfunc->type == IPA_JF_PASS_THROUGH
2342 || jfunc->type == IPA_JF_ANCESTOR)
2344 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2345 tree operand = NULL_TREE;
2346 enum tree_code code;
2347 unsigned src_idx;
2349 if (jfunc->type == IPA_JF_PASS_THROUGH)
2351 code = ipa_get_jf_pass_through_operation (jfunc);
2352 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2353 if (code != NOP_EXPR)
2354 operand = ipa_get_jf_pass_through_operand (jfunc);
2356 else
2358 code = POINTER_PLUS_EXPR;
2359 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2360 unsigned HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2361 operand = build_int_cstu (size_type_node, offset);
2364 class ipcp_param_lattices *src_lats
2365 = ipa_get_parm_lattices (caller_info, src_idx);
2367 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2368 for eg consider:
2369 int f(int x)
2371 g (x & 0xff);
2373 Assume lattice for x is bottom, however we can still propagate
2374 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2375 and we store it in jump function during analysis stage. */
2377 if (src_lats->bits_lattice.bottom_p ()
2378 && jfunc->bits)
2379 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2380 precision);
2381 else
2382 return dest_lattice->meet_with (src_lats->bits_lattice, precision, sgn,
2383 code, operand);
2386 else if (jfunc->type == IPA_JF_ANCESTOR)
2387 return dest_lattice->set_to_bottom ();
2388 else if (jfunc->bits)
2389 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2390 precision);
2391 else
2392 return dest_lattice->set_to_bottom ();
2395 /* Propagate value range across jump function JFUNC that is associated with
2396 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2397 accordingly. */
2399 static bool
2400 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2401 class ipcp_param_lattices *dest_plats,
2402 tree param_type)
2404 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2406 if (dest_lat->bottom_p ())
2407 return false;
2409 if (!param_type
2410 || (!INTEGRAL_TYPE_P (param_type)
2411 && !POINTER_TYPE_P (param_type)))
2412 return dest_lat->set_to_bottom ();
2414 if (jfunc->type == IPA_JF_PASS_THROUGH)
2416 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2417 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2418 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2419 class ipcp_param_lattices *src_lats
2420 = ipa_get_parm_lattices (caller_info, src_idx);
2421 tree operand_type = ipa_get_type (caller_info, src_idx);
2423 if (src_lats->m_value_range.bottom_p ())
2424 return dest_lat->set_to_bottom ();
2426 value_range vr;
2427 if (TREE_CODE_CLASS (operation) == tcc_unary)
2428 ipa_vr_operation_and_type_effects (&vr,
2429 &src_lats->m_value_range.m_vr,
2430 operation, param_type,
2431 operand_type);
2432 /* A crude way to prevent unbounded number of value range updates
2433 in SCC components. We should allow limited number of updates within
2434 SCC, too. */
2435 else if (!ipa_edge_within_scc (cs))
2437 tree op = ipa_get_jf_pass_through_operand (jfunc);
2438 value_range op_vr (op, op);
2439 value_range op_res,res;
2441 range_fold_binary_expr (&op_res, operation, operand_type,
2442 &src_lats->m_value_range.m_vr, &op_vr);
2443 ipa_vr_operation_and_type_effects (&vr,
2444 &op_res,
2445 NOP_EXPR, param_type,
2446 operand_type);
2448 if (!vr.undefined_p () && !vr.varying_p ())
2450 if (jfunc->m_vr)
2452 value_range jvr;
2453 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2454 NOP_EXPR,
2455 param_type,
2456 jfunc->m_vr->type ()))
2457 vr.intersect (jvr);
2459 return dest_lat->meet_with (&vr);
2462 else if (jfunc->type == IPA_JF_CONST)
2464 tree val = ipa_get_jf_constant (jfunc);
2465 if (TREE_CODE (val) == INTEGER_CST)
2467 val = fold_convert (param_type, val);
2468 if (TREE_OVERFLOW_P (val))
2469 val = drop_tree_overflow (val);
2471 value_range tmpvr (val, val);
2472 return dest_lat->meet_with (&tmpvr);
2476 value_range vr;
2477 if (jfunc->m_vr
2478 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2479 param_type,
2480 jfunc->m_vr->type ()))
2481 return dest_lat->meet_with (&vr);
2482 else
2483 return dest_lat->set_to_bottom ();
2486 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2487 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2488 other cases, return false). If there are no aggregate items, set
2489 aggs_by_ref to NEW_AGGS_BY_REF. */
2491 static bool
2492 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2493 bool new_aggs_by_ref)
2495 if (dest_plats->aggs)
2497 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2499 set_agg_lats_to_bottom (dest_plats);
2500 return true;
2503 else
2504 dest_plats->aggs_by_ref = new_aggs_by_ref;
2505 return false;
2508 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2509 already existing lattice for the given OFFSET and SIZE, marking all skipped
2510 lattices as containing variable and checking for overlaps. If there is no
2511 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2512 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2513 unless there are too many already. If there are two many, return false. If
2514 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2515 skipped lattices were newly marked as containing variable, set *CHANGE to
2516 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2518 static bool
2519 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2520 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2521 struct ipcp_agg_lattice ***aglat,
2522 bool pre_existing, bool *change, int max_agg_items)
2524 gcc_checking_assert (offset >= 0);
2526 while (**aglat && (**aglat)->offset < offset)
2528 if ((**aglat)->offset + (**aglat)->size > offset)
2530 set_agg_lats_to_bottom (dest_plats);
2531 return false;
2533 *change |= (**aglat)->set_contains_variable ();
2534 *aglat = &(**aglat)->next;
2537 if (**aglat && (**aglat)->offset == offset)
2539 if ((**aglat)->size != val_size)
2541 set_agg_lats_to_bottom (dest_plats);
2542 return false;
2544 gcc_assert (!(**aglat)->next
2545 || (**aglat)->next->offset >= offset + val_size);
2546 return true;
2548 else
2550 struct ipcp_agg_lattice *new_al;
2552 if (**aglat && (**aglat)->offset < offset + val_size)
2554 set_agg_lats_to_bottom (dest_plats);
2555 return false;
2557 if (dest_plats->aggs_count == max_agg_items)
2558 return false;
2559 dest_plats->aggs_count++;
2560 new_al = ipcp_agg_lattice_pool.allocate ();
2561 memset (new_al, 0, sizeof (*new_al));
2563 new_al->offset = offset;
2564 new_al->size = val_size;
2565 new_al->contains_variable = pre_existing;
2567 new_al->next = **aglat;
2568 **aglat = new_al;
2569 return true;
2573 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2574 containing an unknown value. */
2576 static bool
2577 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2579 bool ret = false;
2580 while (aglat)
2582 ret |= aglat->set_contains_variable ();
2583 aglat = aglat->next;
2585 return ret;
2588 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2589 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2590 parameter used for lattice value sources. Return true if DEST_PLATS changed
2591 in any way. */
2593 static bool
2594 merge_aggregate_lattices (struct cgraph_edge *cs,
2595 class ipcp_param_lattices *dest_plats,
2596 class ipcp_param_lattices *src_plats,
2597 int src_idx, HOST_WIDE_INT offset_delta)
2599 bool pre_existing = dest_plats->aggs != NULL;
2600 struct ipcp_agg_lattice **dst_aglat;
2601 bool ret = false;
2603 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2604 return true;
2605 if (src_plats->aggs_bottom)
2606 return set_agg_lats_contain_variable (dest_plats);
2607 if (src_plats->aggs_contain_variable)
2608 ret |= set_agg_lats_contain_variable (dest_plats);
2609 dst_aglat = &dest_plats->aggs;
2611 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2612 param_ipa_max_agg_items);
2613 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2614 src_aglat;
2615 src_aglat = src_aglat->next)
2617 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2619 if (new_offset < 0)
2620 continue;
2621 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2622 &dst_aglat, pre_existing, &ret, max_agg_items))
2624 struct ipcp_agg_lattice *new_al = *dst_aglat;
2626 dst_aglat = &(*dst_aglat)->next;
2627 if (src_aglat->bottom)
2629 ret |= new_al->set_contains_variable ();
2630 continue;
2632 if (src_aglat->contains_variable)
2633 ret |= new_al->set_contains_variable ();
2634 for (ipcp_value<tree> *val = src_aglat->values;
2635 val;
2636 val = val->next)
2637 ret |= new_al->add_value (val->value, cs, val, src_idx,
2638 src_aglat->offset);
2640 else if (dest_plats->aggs_bottom)
2641 return true;
2643 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2644 return ret;
2647 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2648 pass-through JFUNC and if so, whether it has conform and conforms to the
2649 rules about propagating values passed by reference. */
2651 static bool
2652 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2653 struct ipa_jump_func *jfunc)
2655 return src_plats->aggs
2656 && (!src_plats->aggs_by_ref
2657 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2660 /* Propagate values through ITEM, jump function for a part of an aggregate,
2661 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2662 associated with the jump function. Return true if AGLAT changed in any
2663 way. */
2665 static bool
2666 propagate_aggregate_lattice (struct cgraph_edge *cs,
2667 struct ipa_agg_jf_item *item,
2668 struct ipcp_agg_lattice *aglat)
2670 class ipa_node_params *caller_info;
2671 class ipcp_param_lattices *src_plats;
2672 struct ipcp_lattice<tree> *src_lat;
2673 HOST_WIDE_INT src_offset;
2674 int src_idx;
2675 tree load_type;
2676 bool ret;
2678 if (item->jftype == IPA_JF_CONST)
2680 tree value = item->value.constant;
2682 gcc_checking_assert (is_gimple_ip_invariant (value));
2683 return aglat->add_value (value, cs, NULL, 0);
2686 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2687 || item->jftype == IPA_JF_LOAD_AGG);
2689 caller_info = ipa_node_params_sum->get (cs->caller);
2690 src_idx = item->value.pass_through.formal_id;
2691 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2693 if (item->jftype == IPA_JF_PASS_THROUGH)
2695 load_type = NULL_TREE;
2696 src_lat = &src_plats->itself;
2697 src_offset = -1;
2699 else
2701 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2702 struct ipcp_agg_lattice *src_aglat;
2704 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2705 if (src_aglat->offset >= load_offset)
2706 break;
2708 load_type = item->value.load_agg.type;
2709 if (!src_aglat
2710 || src_aglat->offset > load_offset
2711 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2712 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2713 return aglat->set_contains_variable ();
2715 src_lat = src_aglat;
2716 src_offset = load_offset;
2719 if (src_lat->bottom
2720 || (!ipcp_versionable_function_p (cs->caller)
2721 && !src_lat->is_single_const ()))
2722 return aglat->set_contains_variable ();
2724 ret = propagate_vals_across_arith_jfunc (cs,
2725 item->value.pass_through.operation,
2726 load_type,
2727 item->value.pass_through.operand,
2728 src_lat, aglat,
2729 src_offset,
2730 src_idx,
2731 item->type);
2733 if (src_lat->contains_variable)
2734 ret |= aglat->set_contains_variable ();
2736 return ret;
2739 /* Propagate scalar values across jump function JFUNC that is associated with
2740 edge CS and put the values into DEST_LAT. */
2742 static bool
2743 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2744 struct ipa_jump_func *jfunc,
2745 class ipcp_param_lattices *dest_plats)
2747 bool ret = false;
2749 if (dest_plats->aggs_bottom)
2750 return false;
2752 if (jfunc->type == IPA_JF_PASS_THROUGH
2753 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2755 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2756 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2757 class ipcp_param_lattices *src_plats;
2759 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2760 if (agg_pass_through_permissible_p (src_plats, jfunc))
2762 /* Currently we do not produce clobber aggregate jump
2763 functions, replace with merging when we do. */
2764 gcc_assert (!jfunc->agg.items);
2765 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2766 src_idx, 0);
2767 return ret;
2770 else if (jfunc->type == IPA_JF_ANCESTOR
2771 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2773 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2774 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2775 class ipcp_param_lattices *src_plats;
2777 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2778 if (src_plats->aggs && src_plats->aggs_by_ref)
2780 /* Currently we do not produce clobber aggregate jump
2781 functions, replace with merging when we do. */
2782 gcc_assert (!jfunc->agg.items);
2783 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2784 ipa_get_jf_ancestor_offset (jfunc));
2786 else if (!src_plats->aggs_by_ref)
2787 ret |= set_agg_lats_to_bottom (dest_plats);
2788 else
2789 ret |= set_agg_lats_contain_variable (dest_plats);
2790 return ret;
2793 if (jfunc->agg.items)
2795 bool pre_existing = dest_plats->aggs != NULL;
2796 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2797 struct ipa_agg_jf_item *item;
2798 int i;
2800 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2801 return true;
2803 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2804 param_ipa_max_agg_items);
2805 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2807 HOST_WIDE_INT val_size;
2809 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2810 continue;
2811 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2813 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2814 &aglat, pre_existing, &ret, max_agg_items))
2816 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2817 aglat = &(*aglat)->next;
2819 else if (dest_plats->aggs_bottom)
2820 return true;
2823 ret |= set_chain_of_aglats_contains_variable (*aglat);
2825 else
2826 ret |= set_agg_lats_contain_variable (dest_plats);
2828 return ret;
2831 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2832 non-thunk) destination, the call passes through a thunk. */
2834 static bool
2835 call_passes_through_thunk (cgraph_edge *cs)
2837 cgraph_node *alias_or_thunk = cs->callee;
2838 while (alias_or_thunk->alias)
2839 alias_or_thunk = alias_or_thunk->get_alias_target ();
2840 return alias_or_thunk->thunk;
2843 /* Propagate constants from the caller to the callee of CS. INFO describes the
2844 caller. */
2846 static bool
2847 propagate_constants_across_call (struct cgraph_edge *cs)
2849 class ipa_node_params *callee_info;
2850 enum availability availability;
2851 cgraph_node *callee;
2852 class ipa_edge_args *args;
2853 bool ret = false;
2854 int i, args_count, parms_count;
2856 callee = cs->callee->function_symbol (&availability);
2857 if (!callee->definition)
2858 return false;
2859 gcc_checking_assert (callee->has_gimple_body_p ());
2860 callee_info = ipa_node_params_sum->get (callee);
2861 if (!callee_info)
2862 return false;
2864 args = ipa_edge_args_sum->get (cs);
2865 parms_count = ipa_get_param_count (callee_info);
2866 if (parms_count == 0)
2867 return false;
2868 if (!args
2869 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2870 || !opt_for_fn (cs->caller->decl, optimize))
2872 for (i = 0; i < parms_count; i++)
2873 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2874 i));
2875 return ret;
2877 args_count = ipa_get_cs_argument_count (args);
2879 /* If this call goes through a thunk we must not propagate to the first (0th)
2880 parameter. However, we might need to uncover a thunk from below a series
2881 of aliases first. */
2882 if (call_passes_through_thunk (cs))
2884 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2885 0));
2886 i = 1;
2888 else
2889 i = 0;
2891 for (; (i < args_count) && (i < parms_count); i++)
2893 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2894 class ipcp_param_lattices *dest_plats;
2895 tree param_type = ipa_get_type (callee_info, i);
2897 dest_plats = ipa_get_parm_lattices (callee_info, i);
2898 if (availability == AVAIL_INTERPOSABLE)
2899 ret |= set_all_contains_variable (dest_plats);
2900 else
2902 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2903 &dest_plats->itself,
2904 param_type);
2905 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2906 &dest_plats->ctxlat);
2908 |= propagate_bits_across_jump_function (cs, i, jump_func,
2909 &dest_plats->bits_lattice);
2910 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2911 dest_plats);
2912 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2913 ret |= propagate_vr_across_jump_function (cs, jump_func,
2914 dest_plats, param_type);
2915 else
2916 ret |= dest_plats->m_value_range.set_to_bottom ();
2919 for (; i < parms_count; i++)
2920 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2922 return ret;
2925 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2926 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2927 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2929 static tree
2930 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2931 const vec<tree> &known_csts,
2932 const vec<ipa_polymorphic_call_context> &known_contexts,
2933 const vec<ipa_agg_value_set> &known_aggs,
2934 struct ipa_agg_replacement_value *agg_reps,
2935 bool *speculative)
2937 int param_index = ie->indirect_info->param_index;
2938 HOST_WIDE_INT anc_offset;
2939 tree t = NULL;
2940 tree target = NULL;
2942 *speculative = false;
2944 if (param_index == -1)
2945 return NULL_TREE;
2947 if (!ie->indirect_info->polymorphic)
2949 tree t = NULL;
2951 if (ie->indirect_info->agg_contents)
2953 t = NULL;
2954 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
2956 while (agg_reps)
2958 if (agg_reps->index == param_index
2959 && agg_reps->offset == ie->indirect_info->offset
2960 && agg_reps->by_ref == ie->indirect_info->by_ref)
2962 t = agg_reps->value;
2963 break;
2965 agg_reps = agg_reps->next;
2968 if (!t)
2970 const ipa_agg_value_set *agg;
2971 if (known_aggs.length () > (unsigned int) param_index)
2972 agg = &known_aggs[param_index];
2973 else
2974 agg = NULL;
2975 bool from_global_constant;
2976 t = ipa_find_agg_cst_for_param (agg,
2977 (unsigned) param_index
2978 < known_csts.length ()
2979 ? known_csts[param_index]
2980 : NULL,
2981 ie->indirect_info->offset,
2982 ie->indirect_info->by_ref,
2983 &from_global_constant);
2984 if (t
2985 && !from_global_constant
2986 && !ie->indirect_info->guaranteed_unmodified)
2987 t = NULL_TREE;
2990 else if ((unsigned) param_index < known_csts.length ())
2991 t = known_csts[param_index];
2993 if (t
2994 && TREE_CODE (t) == ADDR_EXPR
2995 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
2996 return TREE_OPERAND (t, 0);
2997 else
2998 return NULL_TREE;
3001 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3002 return NULL_TREE;
3004 gcc_assert (!ie->indirect_info->agg_contents);
3005 anc_offset = ie->indirect_info->offset;
3007 t = NULL;
3009 /* Try to work out value of virtual table pointer value in replacements. */
3010 if (!t && agg_reps && !ie->indirect_info->by_ref)
3012 while (agg_reps)
3014 if (agg_reps->index == param_index
3015 && agg_reps->offset == ie->indirect_info->offset
3016 && agg_reps->by_ref)
3018 t = agg_reps->value;
3019 break;
3021 agg_reps = agg_reps->next;
3025 /* Try to work out value of virtual table pointer value in known
3026 aggregate values. */
3027 if (!t && known_aggs.length () > (unsigned int) param_index
3028 && !ie->indirect_info->by_ref)
3030 const ipa_agg_value_set *agg = &known_aggs[param_index];
3031 t = ipa_find_agg_cst_for_param (agg,
3032 (unsigned) param_index
3033 < known_csts.length ()
3034 ? known_csts[param_index] : NULL,
3035 ie->indirect_info->offset, true);
3038 /* If we found the virtual table pointer, lookup the target. */
3039 if (t)
3041 tree vtable;
3042 unsigned HOST_WIDE_INT offset;
3043 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3045 bool can_refer;
3046 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3047 vtable, offset, &can_refer);
3048 if (can_refer)
3050 if (!target
3051 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3052 || !possible_polymorphic_call_target_p
3053 (ie, cgraph_node::get (target)))
3055 /* Do not speculate builtin_unreachable, it is stupid! */
3056 if (ie->indirect_info->vptr_changed)
3057 return NULL;
3058 target = ipa_impossible_devirt_target (ie, target);
3060 *speculative = ie->indirect_info->vptr_changed;
3061 if (!*speculative)
3062 return target;
3067 /* Do we know the constant value of pointer? */
3068 if (!t && (unsigned) param_index < known_csts.length ())
3069 t = known_csts[param_index];
3071 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3073 ipa_polymorphic_call_context context;
3074 if (known_contexts.length () > (unsigned int) param_index)
3076 context = known_contexts[param_index];
3077 context.offset_by (anc_offset);
3078 if (ie->indirect_info->vptr_changed)
3079 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3080 ie->indirect_info->otr_type);
3081 if (t)
3083 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3084 (t, ie->indirect_info->otr_type, anc_offset);
3085 if (!ctx2.useless_p ())
3086 context.combine_with (ctx2, ie->indirect_info->otr_type);
3089 else if (t)
3091 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3092 anc_offset);
3093 if (ie->indirect_info->vptr_changed)
3094 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3095 ie->indirect_info->otr_type);
3097 else
3098 return NULL_TREE;
3100 vec <cgraph_node *>targets;
3101 bool final;
3103 targets = possible_polymorphic_call_targets
3104 (ie->indirect_info->otr_type,
3105 ie->indirect_info->otr_token,
3106 context, &final);
3107 if (!final || targets.length () > 1)
3109 struct cgraph_node *node;
3110 if (*speculative)
3111 return target;
3112 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3113 || ie->speculative || !ie->maybe_hot_p ())
3114 return NULL;
3115 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3116 ie->indirect_info->otr_token,
3117 context);
3118 if (node)
3120 *speculative = true;
3121 target = node->decl;
3123 else
3124 return NULL;
3126 else
3128 *speculative = false;
3129 if (targets.length () == 1)
3130 target = targets[0]->decl;
3131 else
3132 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3135 if (target && !possible_polymorphic_call_target_p (ie,
3136 cgraph_node::get (target)))
3138 if (*speculative)
3139 return NULL;
3140 target = ipa_impossible_devirt_target (ie, target);
3143 return target;
3146 /* If an indirect edge IE can be turned into a direct one based on data in
3147 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3148 whether the discovered target is only speculative guess. */
3150 tree
3151 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3152 ipa_call_arg_values *avals,
3153 bool *speculative)
3155 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3156 avals->m_known_contexts,
3157 avals->m_known_aggs,
3158 NULL, speculative);
3161 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3163 tree
3164 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3165 ipa_auto_call_arg_values *avals,
3166 bool *speculative)
3168 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3169 avals->m_known_contexts,
3170 avals->m_known_aggs,
3171 NULL, speculative);
3174 /* Calculate devirtualization time bonus for NODE, assuming we know information
3175 about arguments stored in AVALS. */
3177 static int
3178 devirtualization_time_bonus (struct cgraph_node *node,
3179 ipa_auto_call_arg_values *avals)
3181 struct cgraph_edge *ie;
3182 int res = 0;
3184 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3186 struct cgraph_node *callee;
3187 class ipa_fn_summary *isummary;
3188 enum availability avail;
3189 tree target;
3190 bool speculative;
3192 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3193 if (!target)
3194 continue;
3196 /* Only bare minimum benefit for clearly un-inlineable targets. */
3197 res += 1;
3198 callee = cgraph_node::get (target);
3199 if (!callee || !callee->definition)
3200 continue;
3201 callee = callee->function_symbol (&avail);
3202 if (avail < AVAIL_AVAILABLE)
3203 continue;
3204 isummary = ipa_fn_summaries->get (callee);
3205 if (!isummary || !isummary->inlinable)
3206 continue;
3208 int size = ipa_size_summaries->get (callee)->size;
3209 /* FIXME: The values below need re-considering and perhaps also
3210 integrating into the cost metrics, at lest in some very basic way. */
3211 int max_inline_insns_auto
3212 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3213 if (size <= max_inline_insns_auto / 4)
3214 res += 31 / ((int)speculative + 1);
3215 else if (size <= max_inline_insns_auto / 2)
3216 res += 15 / ((int)speculative + 1);
3217 else if (size <= max_inline_insns_auto
3218 || DECL_DECLARED_INLINE_P (callee->decl))
3219 res += 7 / ((int)speculative + 1);
3222 return res;
3225 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3227 static int
3228 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3230 int result = 0;
3231 ipa_hints hints = estimates.hints;
3232 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3233 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3235 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3237 if (hints & INLINE_HINT_loop_iterations)
3238 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3240 if (hints & INLINE_HINT_loop_stride)
3241 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3243 return result;
3246 /* If there is a reason to penalize the function described by INFO in the
3247 cloning goodness evaluation, do so. */
3249 static inline sreal
3250 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3251 sreal evaluation)
3253 if (info->node_within_scc && !info->node_is_self_scc)
3254 evaluation = (evaluation
3255 * (100 - opt_for_fn (node->decl,
3256 param_ipa_cp_recursion_penalty))) / 100;
3258 if (info->node_calling_single_call)
3259 evaluation = (evaluation
3260 * (100 - opt_for_fn (node->decl,
3261 param_ipa_cp_single_call_penalty)))
3262 / 100;
3264 return evaluation;
3267 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3268 and SIZE_COST and with the sum of frequencies of incoming edges to the
3269 potential new clone in FREQUENCIES. */
3271 static bool
3272 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3273 sreal freq_sum, profile_count count_sum,
3274 int size_cost)
3276 if (time_benefit == 0
3277 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3278 || node->optimize_for_size_p ())
3279 return false;
3281 gcc_assert (size_cost > 0);
3283 ipa_node_params *info = ipa_node_params_sum->get (node);
3284 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3285 if (max_count > profile_count::zero ())
3288 sreal factor = count_sum.probability_in (max_count).to_sreal ();
3289 sreal evaluation = (time_benefit * factor) / size_cost;
3290 evaluation = incorporate_penalties (node, info, evaluation);
3291 evaluation *= 1000;
3293 if (dump_file && (dump_flags & TDF_DETAILS))
3295 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3296 "size: %i, count_sum: ", time_benefit.to_double (),
3297 size_cost);
3298 count_sum.dump (dump_file);
3299 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3300 info->node_within_scc
3301 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3302 info->node_calling_single_call ? ", single_call" : "",
3303 evaluation.to_double (), eval_threshold);
3306 return evaluation.to_int () >= eval_threshold;
3308 else
3310 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3311 evaluation = incorporate_penalties (node, info, evaluation);
3312 evaluation *= 1000;
3314 if (dump_file && (dump_flags & TDF_DETAILS))
3315 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3316 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3317 "threshold: %i\n",
3318 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3319 info->node_within_scc
3320 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3321 info->node_calling_single_call ? ", single_call" : "",
3322 evaluation.to_double (), eval_threshold);
3324 return evaluation.to_int () >= eval_threshold;
3328 /* Return all context independent values from aggregate lattices in PLATS in a
3329 vector. Return NULL if there are none. */
3331 static vec<ipa_agg_value>
3332 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3334 vec<ipa_agg_value> res = vNULL;
3336 if (plats->aggs_bottom
3337 || plats->aggs_contain_variable
3338 || plats->aggs_count == 0)
3339 return vNULL;
3341 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3342 aglat;
3343 aglat = aglat->next)
3344 if (aglat->is_single_const ())
3346 struct ipa_agg_value item;
3347 item.offset = aglat->offset;
3348 item.value = aglat->values->value;
3349 res.safe_push (item);
3351 return res;
3354 /* Grow vectors in AVALS and fill them with information about values of
3355 parameters that are known to be independent of the context. Only calculate
3356 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3357 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3358 parameters will be stored in it.
3360 TODO: Also grow context independent value range vectors. */
3362 static bool
3363 gather_context_independent_values (class ipa_node_params *info,
3364 ipa_auto_call_arg_values *avals,
3365 bool calculate_aggs,
3366 int *removable_params_cost)
3368 int i, count = ipa_get_param_count (info);
3369 bool ret = false;
3371 avals->m_known_vals.safe_grow_cleared (count, true);
3372 avals->m_known_contexts.safe_grow_cleared (count, true);
3373 if (calculate_aggs)
3374 avals->m_known_aggs.safe_grow_cleared (count, true);
3376 if (removable_params_cost)
3377 *removable_params_cost = 0;
3379 for (i = 0; i < count; i++)
3381 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3382 ipcp_lattice<tree> *lat = &plats->itself;
3384 if (lat->is_single_const ())
3386 ipcp_value<tree> *val = lat->values;
3387 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3388 avals->m_known_vals[i] = val->value;
3389 if (removable_params_cost)
3390 *removable_params_cost
3391 += estimate_move_cost (TREE_TYPE (val->value), false);
3392 ret = true;
3394 else if (removable_params_cost
3395 && !ipa_is_param_used (info, i))
3396 *removable_params_cost
3397 += ipa_get_param_move_cost (info, i);
3399 if (!ipa_is_param_used (info, i))
3400 continue;
3402 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3403 /* Do not account known context as reason for cloning. We can see
3404 if it permits devirtualization. */
3405 if (ctxlat->is_single_const ())
3406 avals->m_known_contexts[i] = ctxlat->values->value;
3408 if (calculate_aggs)
3410 vec<ipa_agg_value> agg_items;
3411 struct ipa_agg_value_set *agg;
3413 agg_items = context_independent_aggregate_values (plats);
3414 agg = &avals->m_known_aggs[i];
3415 agg->items = agg_items;
3416 agg->by_ref = plats->aggs_by_ref;
3417 ret |= !agg_items.is_empty ();
3421 return ret;
3424 /* Perform time and size measurement of NODE with the context given in AVALS,
3425 calculate the benefit compared to the node without specialization and store
3426 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3427 context-independent or unused removable parameters and EST_MOVE_COST, the
3428 estimated movement of the considered parameter. */
3430 static void
3431 perform_estimation_of_a_value (cgraph_node *node,
3432 ipa_auto_call_arg_values *avals,
3433 int removable_params_cost, int est_move_cost,
3434 ipcp_value_base *val)
3436 sreal time_benefit;
3437 ipa_call_estimates estimates;
3439 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3441 /* Extern inline functions have no cloning local time benefits because they
3442 will be inlined anyway. The only reason to clone them is if it enables
3443 optimization in any of the functions they call. */
3444 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3445 time_benefit = 0;
3446 else
3447 time_benefit = (estimates.nonspecialized_time - estimates.time)
3448 + (devirtualization_time_bonus (node, avals)
3449 + hint_time_bonus (node, estimates)
3450 + removable_params_cost + est_move_cost);
3452 int size = estimates.size;
3453 gcc_checking_assert (size >=0);
3454 /* The inliner-heuristics based estimates may think that in certain
3455 contexts some functions do not have any size at all but we want
3456 all specializations to have at least a tiny cost, not least not to
3457 divide by zero. */
3458 if (size == 0)
3459 size = 1;
3461 val->local_time_benefit = time_benefit;
3462 val->local_size_cost = size;
3465 /* Get the overall limit oof growth based on parameters extracted from growth.
3466 it does not really make sense to mix functions with different overall growth
3467 limits but it is possible and if it happens, we do not want to select one
3468 limit at random. */
3470 static long
3471 get_max_overall_size (cgraph_node *node)
3473 long max_new_size = orig_overall_size;
3474 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3475 if (max_new_size < large_unit)
3476 max_new_size = large_unit;
3477 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3478 max_new_size += max_new_size * unit_growth / 100 + 1;
3479 return max_new_size;
3482 /* Iterate over known values of parameters of NODE and estimate the local
3483 effects in terms of time and size they have. */
3485 static void
3486 estimate_local_effects (struct cgraph_node *node)
3488 ipa_node_params *info = ipa_node_params_sum->get (node);
3489 int i, count = ipa_get_param_count (info);
3490 bool always_const;
3491 int removable_params_cost;
3493 if (!count || !ipcp_versionable_function_p (node))
3494 return;
3496 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3499 ipa_auto_call_arg_values avals;
3500 always_const = gather_context_independent_values (info, &avals, true,
3501 &removable_params_cost);
3502 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3503 if (always_const || devirt_bonus
3504 || (removable_params_cost && node->can_change_signature))
3506 struct caller_statistics stats;
3507 ipa_call_estimates estimates;
3509 init_caller_stats (&stats);
3510 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3511 false);
3512 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3513 sreal time = estimates.nonspecialized_time - estimates.time;
3514 time += devirt_bonus;
3515 time += hint_time_bonus (node, estimates);
3516 time += removable_params_cost;
3517 int size = estimates.size - stats.n_calls * removable_params_cost;
3519 if (dump_file)
3520 fprintf (dump_file, " - context independent values, size: %i, "
3521 "time_benefit: %f\n", size, (time).to_double ());
3523 if (size <= 0 || node->local)
3525 info->do_clone_for_all_contexts = true;
3527 if (dump_file)
3528 fprintf (dump_file, " Decided to specialize for all "
3529 "known contexts, code not going to grow.\n");
3531 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3532 stats.count_sum, size))
3534 if (size + overall_size <= get_max_overall_size (node))
3536 info->do_clone_for_all_contexts = true;
3537 overall_size += size;
3539 if (dump_file)
3540 fprintf (dump_file, " Decided to specialize for all "
3541 "known contexts, growth (to %li) deemed "
3542 "beneficial.\n", overall_size);
3544 else if (dump_file && (dump_flags & TDF_DETAILS))
3545 fprintf (dump_file, " Not cloning for all contexts because "
3546 "maximum unit size would be reached with %li.\n",
3547 size + overall_size);
3549 else if (dump_file && (dump_flags & TDF_DETAILS))
3550 fprintf (dump_file, " Not cloning for all contexts because "
3551 "!good_cloning_opportunity_p.\n");
3555 for (i = 0; i < count; i++)
3557 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3558 ipcp_lattice<tree> *lat = &plats->itself;
3559 ipcp_value<tree> *val;
3561 if (lat->bottom
3562 || !lat->values
3563 || avals.m_known_vals[i])
3564 continue;
3566 for (val = lat->values; val; val = val->next)
3568 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3569 avals.m_known_vals[i] = val->value;
3571 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3572 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3573 emc, val);
3575 if (dump_file && (dump_flags & TDF_DETAILS))
3577 fprintf (dump_file, " - estimates for value ");
3578 print_ipcp_constant_value (dump_file, val->value);
3579 fprintf (dump_file, " for ");
3580 ipa_dump_param (dump_file, info, i);
3581 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3582 val->local_time_benefit.to_double (),
3583 val->local_size_cost);
3586 avals.m_known_vals[i] = NULL_TREE;
3589 for (i = 0; i < count; i++)
3591 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3593 if (!plats->virt_call)
3594 continue;
3596 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3597 ipcp_value<ipa_polymorphic_call_context> *val;
3599 if (ctxlat->bottom
3600 || !ctxlat->values
3601 || !avals.m_known_contexts[i].useless_p ())
3602 continue;
3604 for (val = ctxlat->values; val; val = val->next)
3606 avals.m_known_contexts[i] = val->value;
3607 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3608 0, val);
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3612 fprintf (dump_file, " - estimates for polymorphic context ");
3613 print_ipcp_constant_value (dump_file, val->value);
3614 fprintf (dump_file, " for ");
3615 ipa_dump_param (dump_file, info, i);
3616 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3617 val->local_time_benefit.to_double (),
3618 val->local_size_cost);
3621 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3624 for (i = 0; i < count; i++)
3626 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3628 if (plats->aggs_bottom || !plats->aggs)
3629 continue;
3631 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3632 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3634 ipcp_value<tree> *val;
3635 if (aglat->bottom || !aglat->values
3636 /* If the following is true, the one value is in known_aggs. */
3637 || (!plats->aggs_contain_variable
3638 && aglat->is_single_const ()))
3639 continue;
3641 for (val = aglat->values; val; val = val->next)
3643 struct ipa_agg_value item;
3645 item.offset = aglat->offset;
3646 item.value = val->value;
3647 agg->items.safe_push (item);
3649 perform_estimation_of_a_value (node, &avals,
3650 removable_params_cost, 0, val);
3652 if (dump_file && (dump_flags & TDF_DETAILS))
3654 fprintf (dump_file, " - estimates for value ");
3655 print_ipcp_constant_value (dump_file, val->value);
3656 fprintf (dump_file, " for ");
3657 ipa_dump_param (dump_file, info, i);
3658 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3659 "]: time_benefit: %g, size: %i\n",
3660 plats->aggs_by_ref ? "ref " : "",
3661 aglat->offset,
3662 val->local_time_benefit.to_double (),
3663 val->local_size_cost);
3666 agg->items.pop ();
3673 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3674 topological sort of values. */
3676 template <typename valtype>
3677 void
3678 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3680 ipcp_value_source<valtype> *src;
3682 if (cur_val->dfs)
3683 return;
3685 dfs_counter++;
3686 cur_val->dfs = dfs_counter;
3687 cur_val->low_link = dfs_counter;
3689 cur_val->topo_next = stack;
3690 stack = cur_val;
3691 cur_val->on_stack = true;
3693 for (src = cur_val->sources; src; src = src->next)
3694 if (src->val)
3696 if (src->val->dfs == 0)
3698 add_val (src->val);
3699 if (src->val->low_link < cur_val->low_link)
3700 cur_val->low_link = src->val->low_link;
3702 else if (src->val->on_stack
3703 && src->val->dfs < cur_val->low_link)
3704 cur_val->low_link = src->val->dfs;
3707 if (cur_val->dfs == cur_val->low_link)
3709 ipcp_value<valtype> *v, *scc_list = NULL;
3713 v = stack;
3714 stack = v->topo_next;
3715 v->on_stack = false;
3716 v->scc_no = cur_val->dfs;
3718 v->scc_next = scc_list;
3719 scc_list = v;
3721 while (v != cur_val);
3723 cur_val->topo_next = values_topo;
3724 values_topo = cur_val;
3728 /* Add all values in lattices associated with NODE to the topological sort if
3729 they are not there yet. */
3731 static void
3732 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3734 ipa_node_params *info = ipa_node_params_sum->get (node);
3735 int i, count = ipa_get_param_count (info);
3737 for (i = 0; i < count; i++)
3739 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3740 ipcp_lattice<tree> *lat = &plats->itself;
3741 struct ipcp_agg_lattice *aglat;
3743 if (!lat->bottom)
3745 ipcp_value<tree> *val;
3746 for (val = lat->values; val; val = val->next)
3747 topo->constants.add_val (val);
3750 if (!plats->aggs_bottom)
3751 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3752 if (!aglat->bottom)
3754 ipcp_value<tree> *val;
3755 for (val = aglat->values; val; val = val->next)
3756 topo->constants.add_val (val);
3759 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3760 if (!ctxlat->bottom)
3762 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3763 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3764 topo->contexts.add_val (ctxval);
3769 /* One pass of constants propagation along the call graph edges, from callers
3770 to callees (requires topological ordering in TOPO), iterate over strongly
3771 connected components. */
3773 static void
3774 propagate_constants_topo (class ipa_topo_info *topo)
3776 int i;
3778 for (i = topo->nnodes - 1; i >= 0; i--)
3780 unsigned j;
3781 struct cgraph_node *v, *node = topo->order[i];
3782 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3784 /* First, iteratively propagate within the strongly connected component
3785 until all lattices stabilize. */
3786 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3787 if (v->has_gimple_body_p ())
3789 if (opt_for_fn (v->decl, flag_ipa_cp)
3790 && opt_for_fn (v->decl, optimize))
3791 push_node_to_stack (topo, v);
3792 /* When V is not optimized, we can not push it to stack, but
3793 still we need to set all its callees lattices to bottom. */
3794 else
3796 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3797 propagate_constants_across_call (cs);
3801 v = pop_node_from_stack (topo);
3802 while (v)
3804 struct cgraph_edge *cs;
3805 class ipa_node_params *info = NULL;
3806 bool self_scc = true;
3808 for (cs = v->callees; cs; cs = cs->next_callee)
3809 if (ipa_edge_within_scc (cs))
3811 cgraph_node *callee = cs->callee->function_symbol ();
3813 if (v != callee)
3814 self_scc = false;
3816 if (!info)
3818 info = ipa_node_params_sum->get (v);
3819 info->node_within_scc = true;
3822 if (propagate_constants_across_call (cs))
3823 push_node_to_stack (topo, callee);
3826 if (info)
3827 info->node_is_self_scc = self_scc;
3829 v = pop_node_from_stack (topo);
3832 /* Afterwards, propagate along edges leading out of the SCC, calculates
3833 the local effects of the discovered constants and all valid values to
3834 their topological sort. */
3835 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3836 if (v->has_gimple_body_p ()
3837 && opt_for_fn (v->decl, flag_ipa_cp)
3838 && opt_for_fn (v->decl, optimize))
3840 struct cgraph_edge *cs;
3842 estimate_local_effects (v);
3843 add_all_node_vals_to_toposort (v, topo);
3844 for (cs = v->callees; cs; cs = cs->next_callee)
3845 if (!ipa_edge_within_scc (cs))
3846 propagate_constants_across_call (cs);
3848 cycle_nodes.release ();
3852 /* Propagate the estimated effects of individual values along the topological
3853 from the dependent values to those they depend on. */
3855 template <typename valtype>
3856 void
3857 value_topo_info<valtype>::propagate_effects ()
3859 ipcp_value<valtype> *base;
3860 hash_set<ipcp_value<valtype> *> processed_srcvals;
3862 for (base = values_topo; base; base = base->topo_next)
3864 ipcp_value_source<valtype> *src;
3865 ipcp_value<valtype> *val;
3866 sreal time = 0;
3867 HOST_WIDE_INT size = 0;
3869 for (val = base; val; val = val->scc_next)
3871 time = time + val->local_time_benefit + val->prop_time_benefit;
3872 size = size + val->local_size_cost + val->prop_size_cost;
3875 for (val = base; val; val = val->scc_next)
3877 processed_srcvals.empty ();
3878 for (src = val->sources; src; src = src->next)
3879 if (src->val
3880 && src->cs->maybe_hot_p ())
3882 if (!processed_srcvals.add (src->val))
3884 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3885 if (prop_size < INT_MAX)
3886 src->val->prop_size_cost = prop_size;
3887 else
3888 continue;
3891 int special_factor = 1;
3892 if (val->same_scc (src->val))
3893 special_factor
3894 = opt_for_fn(src->cs->caller->decl,
3895 param_ipa_cp_recursive_freq_factor);
3896 else if (val->self_recursion_generated_p ()
3897 && (src->cs->callee->function_symbol ()
3898 == src->cs->caller))
3900 int max_recur_gen_depth
3901 = opt_for_fn(src->cs->caller->decl,
3902 param_ipa_cp_max_recursive_depth);
3903 special_factor = max_recur_gen_depth
3904 - val->self_recursion_generated_level + 1;
3907 src->val->prop_time_benefit
3908 += time * special_factor * src->cs->sreal_frequency ();
3911 if (size < INT_MAX)
3913 val->prop_time_benefit = time;
3914 val->prop_size_cost = size;
3916 else
3918 val->prop_time_benefit = 0;
3919 val->prop_size_cost = 0;
3926 /* Propagate constants, polymorphic contexts and their effects from the
3927 summaries interprocedurally. */
3929 static void
3930 ipcp_propagate_stage (class ipa_topo_info *topo)
3932 struct cgraph_node *node;
3934 if (dump_file)
3935 fprintf (dump_file, "\n Propagating constants:\n\n");
3937 max_count = profile_count::uninitialized ();
3939 FOR_EACH_DEFINED_FUNCTION (node)
3941 if (node->has_gimple_body_p ()
3942 && opt_for_fn (node->decl, flag_ipa_cp)
3943 && opt_for_fn (node->decl, optimize))
3945 ipa_node_params *info = ipa_node_params_sum->get (node);
3946 determine_versionability (node, info);
3948 unsigned nlattices = ipa_get_param_count (info);
3949 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
3950 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
3951 initialize_node_lattices (node);
3953 ipa_size_summary *s = ipa_size_summaries->get (node);
3954 if (node->definition && !node->alias && s != NULL)
3955 overall_size += s->self_size;
3956 max_count = max_count.max (node->count.ipa ());
3959 orig_overall_size = overall_size;
3961 if (dump_file)
3962 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
3964 propagate_constants_topo (topo);
3965 if (flag_checking)
3966 ipcp_verify_propagated_values ();
3967 topo->constants.propagate_effects ();
3968 topo->contexts.propagate_effects ();
3970 if (dump_file)
3972 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
3973 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
3977 /* Discover newly direct outgoing edges from NODE which is a new clone with
3978 known KNOWN_CSTS and make them direct. */
3980 static void
3981 ipcp_discover_new_direct_edges (struct cgraph_node *node,
3982 vec<tree> known_csts,
3983 vec<ipa_polymorphic_call_context>
3984 known_contexts,
3985 struct ipa_agg_replacement_value *aggvals)
3987 struct cgraph_edge *ie, *next_ie;
3988 bool found = false;
3990 for (ie = node->indirect_calls; ie; ie = next_ie)
3992 tree target;
3993 bool speculative;
3995 next_ie = ie->next_callee;
3996 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
3997 vNULL, aggvals, &speculative);
3998 if (target)
4000 bool agg_contents = ie->indirect_info->agg_contents;
4001 bool polymorphic = ie->indirect_info->polymorphic;
4002 int param_index = ie->indirect_info->param_index;
4003 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4004 speculative);
4005 found = true;
4007 if (cs && !agg_contents && !polymorphic)
4009 ipa_node_params *info = ipa_node_params_sum->get (node);
4010 int c = ipa_get_controlled_uses (info, param_index);
4011 if (c != IPA_UNDESCRIBED_USE
4012 && !ipa_get_param_load_dereferenced (info, param_index))
4014 struct ipa_ref *to_del;
4016 c--;
4017 ipa_set_controlled_uses (info, param_index, c);
4018 if (dump_file && (dump_flags & TDF_DETAILS))
4019 fprintf (dump_file, " controlled uses count of param "
4020 "%i bumped down to %i\n", param_index, c);
4021 if (c == 0
4022 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4024 if (dump_file && (dump_flags & TDF_DETAILS))
4025 fprintf (dump_file, " and even removing its "
4026 "cloning-created reference\n");
4027 to_del->remove_reference ();
4033 /* Turning calls to direct calls will improve overall summary. */
4034 if (found)
4035 ipa_update_overall_fn_summary (node);
4038 class edge_clone_summary;
4039 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4041 /* Edge clone summary. */
4043 class edge_clone_summary
4045 public:
4046 /* Default constructor. */
4047 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4049 /* Default destructor. */
4050 ~edge_clone_summary ()
4052 if (prev_clone)
4053 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4054 if (next_clone)
4055 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4058 cgraph_edge *prev_clone;
4059 cgraph_edge *next_clone;
4062 class edge_clone_summary_t:
4063 public call_summary <edge_clone_summary *>
4065 public:
4066 edge_clone_summary_t (symbol_table *symtab):
4067 call_summary <edge_clone_summary *> (symtab)
4069 m_initialize_when_cloning = true;
4072 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4073 edge_clone_summary *src_data,
4074 edge_clone_summary *dst_data);
4077 /* Edge duplication hook. */
4079 void
4080 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4081 edge_clone_summary *src_data,
4082 edge_clone_summary *dst_data)
4084 if (src_data->next_clone)
4085 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4086 dst_data->prev_clone = src_edge;
4087 dst_data->next_clone = src_data->next_clone;
4088 src_data->next_clone = dst_edge;
4091 /* Return true is CS calls DEST or its clone for all contexts. When
4092 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4093 edges from/to an all-context clone. */
4095 static bool
4096 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4097 bool allow_recursion_to_clone)
4099 enum availability availability;
4100 cgraph_node *callee = cs->callee->function_symbol (&availability);
4102 if (availability <= AVAIL_INTERPOSABLE)
4103 return false;
4104 if (callee == dest)
4105 return true;
4106 if (!allow_recursion_to_clone && cs->caller == callee)
4107 return false;
4109 ipa_node_params *info = ipa_node_params_sum->get (callee);
4110 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4113 /* Return true if edge CS does bring about the value described by SRC to
4114 DEST_VAL of node DEST or its clone for all contexts. */
4116 static bool
4117 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4118 cgraph_node *dest, ipcp_value<tree> *dest_val)
4120 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4122 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4123 || caller_info->node_dead)
4124 return false;
4126 if (!src->val)
4127 return true;
4129 if (caller_info->ipcp_orig_node)
4131 tree t;
4132 if (src->offset == -1)
4133 t = caller_info->known_csts[src->index];
4134 else
4135 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4136 return (t != NULL_TREE
4137 && values_equal_for_ipcp_p (src->val->value, t));
4139 else
4141 if (src->val == dest_val)
4142 return true;
4144 struct ipcp_agg_lattice *aglat;
4145 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4146 src->index);
4147 if (src->offset == -1)
4148 return (plats->itself.is_single_const ()
4149 && values_equal_for_ipcp_p (src->val->value,
4150 plats->itself.values->value));
4151 else
4153 if (plats->aggs_bottom || plats->aggs_contain_variable)
4154 return false;
4155 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4156 if (aglat->offset == src->offset)
4157 return (aglat->is_single_const ()
4158 && values_equal_for_ipcp_p (src->val->value,
4159 aglat->values->value));
4161 return false;
4165 /* Return true if edge CS does bring about the value described by SRC to
4166 DST_VAL of node DEST or its clone for all contexts. */
4168 static bool
4169 cgraph_edge_brings_value_p (cgraph_edge *cs,
4170 ipcp_value_source<ipa_polymorphic_call_context> *src,
4171 cgraph_node *dest,
4172 ipcp_value<ipa_polymorphic_call_context> *)
4174 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4176 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4177 || caller_info->node_dead)
4178 return false;
4179 if (!src->val)
4180 return true;
4182 if (caller_info->ipcp_orig_node)
4183 return (caller_info->known_contexts.length () > (unsigned) src->index)
4184 && values_equal_for_ipcp_p (src->val->value,
4185 caller_info->known_contexts[src->index]);
4187 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4188 src->index);
4189 return plats->ctxlat.is_single_const ()
4190 && values_equal_for_ipcp_p (src->val->value,
4191 plats->ctxlat.values->value);
4194 /* Get the next clone in the linked list of clones of an edge. */
4196 static inline struct cgraph_edge *
4197 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4199 edge_clone_summary *s = edge_clone_summaries->get (cs);
4200 return s != NULL ? s->next_clone : NULL;
4203 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4204 of them is viable and hot, return true. In that case, for those that still
4205 hold, add their edge frequency and their number into *FREQUENCY and
4206 *CALLER_COUNT respectively. */
4208 template <typename valtype>
4209 static bool
4210 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4211 sreal *freq_sum, profile_count *count_sum,
4212 int *caller_count)
4214 ipcp_value_source<valtype> *src;
4215 sreal freq = 0;
4216 int count = 0;
4217 profile_count cnt = profile_count::zero ();
4218 bool hot = false;
4219 bool non_self_recursive = false;
4221 for (src = val->sources; src; src = src->next)
4223 struct cgraph_edge *cs = src->cs;
4224 while (cs)
4226 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4228 count++;
4229 freq += cs->sreal_frequency ();
4230 if (cs->count.ipa ().initialized_p ())
4231 cnt += cs->count.ipa ();
4232 hot |= cs->maybe_hot_p ();
4233 if (cs->caller != dest)
4234 non_self_recursive = true;
4236 cs = get_next_cgraph_edge_clone (cs);
4240 /* If the only edges bringing a value are self-recursive ones, do not bother
4241 evaluating it. */
4242 if (!non_self_recursive)
4243 return false;
4245 *freq_sum = freq;
4246 *count_sum = cnt;
4247 *caller_count = count;
4249 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4251 struct cgraph_edge *cs;
4253 /* Cold non-SCC source edge could trigger hot recursive execution of
4254 function. Consider the case as hot and rely on following cost model
4255 computation to further select right one. */
4256 for (cs = dest->callers; cs; cs = cs->next_caller)
4257 if (cs->caller == dest && cs->maybe_hot_p ())
4258 return true;
4261 return hot;
4264 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4265 to let a non-self-recursive caller be the first element. Thus, we can
4266 simplify intersecting operations on values that arrive from all of these
4267 callers, especially when there exists self-recursive call. Return true if
4268 this kind of adjustment is possible. */
4270 static bool
4271 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4272 cgraph_node *node)
4274 for (unsigned i = 0; i < callers.length (); i++)
4276 cgraph_edge *cs = callers[i];
4278 if (cs->caller != node)
4280 if (i > 0)
4282 callers[i] = callers[0];
4283 callers[0] = cs;
4285 return true;
4288 return false;
4291 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4292 is assumed their number is known and equal to CALLER_COUNT. */
4294 template <typename valtype>
4295 static vec<cgraph_edge *>
4296 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4297 int caller_count)
4299 ipcp_value_source<valtype> *src;
4300 vec<cgraph_edge *> ret;
4302 ret.create (caller_count);
4303 for (src = val->sources; src; src = src->next)
4305 struct cgraph_edge *cs = src->cs;
4306 while (cs)
4308 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4309 ret.quick_push (cs);
4310 cs = get_next_cgraph_edge_clone (cs);
4314 if (caller_count > 1)
4315 adjust_callers_for_value_intersection (ret, dest);
4317 return ret;
4320 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4321 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4322 should be set to true when the reference created for the constant should be
4323 a load one and not an address one because the corresponding parameter p is
4324 only used as *p. */
4326 static struct ipa_replace_map *
4327 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4328 bool force_load_ref)
4330 struct ipa_replace_map *replace_map;
4332 replace_map = ggc_alloc<ipa_replace_map> ();
4333 if (dump_file)
4335 fprintf (dump_file, " replacing ");
4336 ipa_dump_param (dump_file, info, parm_num);
4338 fprintf (dump_file, " with const ");
4339 print_generic_expr (dump_file, value);
4341 if (force_load_ref)
4342 fprintf (dump_file, " - forcing load reference\n");
4343 else
4344 fprintf (dump_file, "\n");
4346 replace_map->parm_num = parm_num;
4347 replace_map->new_tree = value;
4348 replace_map->force_load_ref = force_load_ref;
4349 return replace_map;
4352 /* Dump new profiling counts */
4354 static void
4355 dump_profile_updates (struct cgraph_node *orig_node,
4356 struct cgraph_node *new_node)
4358 struct cgraph_edge *cs;
4360 fprintf (dump_file, " setting count of the specialized node to ");
4361 new_node->count.dump (dump_file);
4362 fprintf (dump_file, "\n");
4363 for (cs = new_node->callees; cs; cs = cs->next_callee)
4365 fprintf (dump_file, " edge to %s has count ",
4366 cs->callee->dump_name ());
4367 cs->count.dump (dump_file);
4368 fprintf (dump_file, "\n");
4371 fprintf (dump_file, " setting count of the original node to ");
4372 orig_node->count.dump (dump_file);
4373 fprintf (dump_file, "\n");
4374 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4376 fprintf (dump_file, " edge to %s is left with ",
4377 cs->callee->dump_name ());
4378 cs->count.dump (dump_file);
4379 fprintf (dump_file, "\n");
4383 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4384 their profile information to reflect this. */
4386 static void
4387 update_profiling_info (struct cgraph_node *orig_node,
4388 struct cgraph_node *new_node)
4390 struct cgraph_edge *cs;
4391 struct caller_statistics stats;
4392 profile_count new_sum, orig_sum;
4393 profile_count remainder, orig_node_count = orig_node->count;
4394 profile_count orig_new_node_count = new_node->count;
4396 if (!(orig_node_count.ipa () > profile_count::zero ()))
4397 return;
4399 init_caller_stats (&stats);
4400 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4401 false);
4402 orig_sum = stats.count_sum;
4403 init_caller_stats (&stats);
4404 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4405 false);
4406 new_sum = stats.count_sum;
4408 if (orig_node_count < orig_sum + new_sum)
4410 if (dump_file)
4412 fprintf (dump_file, " Problem: node %s has too low count ",
4413 orig_node->dump_name ());
4414 orig_node_count.dump (dump_file);
4415 fprintf (dump_file, "while the sum of incoming count is ");
4416 (orig_sum + new_sum).dump (dump_file);
4417 fprintf (dump_file, "\n");
4420 orig_node_count = (orig_sum + new_sum).apply_scale (12, 10);
4421 if (dump_file)
4423 fprintf (dump_file, " proceeding by pretending it was ");
4424 orig_node_count.dump (dump_file);
4425 fprintf (dump_file, "\n");
4429 remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
4430 - new_sum.ipa ());
4432 /* With partial train run we do not want to assume that original's
4433 count is zero whenever we redurect all executed edges to clone.
4434 Simply drop profile to local one in this case. */
4435 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4436 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4437 && flag_profile_partial_training)
4438 remainder = remainder.guessed_local ();
4440 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4441 new_node->count = new_sum;
4442 orig_node->count = remainder;
4444 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4445 for (cs = new_node->callees; cs; cs = cs->next_callee)
4446 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4447 for (cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4448 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4450 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4451 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4452 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4453 for (cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4454 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4456 if (dump_file)
4457 dump_profile_updates (orig_node, new_node);
4460 /* Update the respective profile of specialized NEW_NODE and the original
4461 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4462 have been redirected to the specialized version. */
4464 static void
4465 update_specialized_profile (struct cgraph_node *new_node,
4466 struct cgraph_node *orig_node,
4467 profile_count redirected_sum)
4469 struct cgraph_edge *cs;
4470 profile_count new_node_count, orig_node_count = orig_node->count;
4472 if (dump_file)
4474 fprintf (dump_file, " the sum of counts of redirected edges is ");
4475 redirected_sum.dump (dump_file);
4476 fprintf (dump_file, "\n");
4478 if (!(orig_node_count > profile_count::zero ()))
4479 return;
4481 gcc_assert (orig_node_count >= redirected_sum);
4483 new_node_count = new_node->count;
4484 new_node->count += redirected_sum;
4485 orig_node->count -= redirected_sum;
4487 for (cs = new_node->callees; cs; cs = cs->next_callee)
4488 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4490 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4492 profile_count dec = cs->count.apply_scale (redirected_sum,
4493 orig_node_count);
4494 cs->count -= dec;
4497 if (dump_file)
4498 dump_profile_updates (orig_node, new_node);
4501 static void adjust_references_in_caller (cgraph_edge *cs,
4502 symtab_node *symbol, int index);
4504 /* Simple structure to pass a symbol and index (with same meaning as parameters
4505 of adjust_references_in_caller) through a void* parameter of a
4506 call_for_symbol_thunks_and_aliases callback. */
4507 struct symbol_and_index_together
4509 symtab_node *symbol;
4510 int index;
4513 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4514 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4515 static bool
4516 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4518 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4519 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4520 if (!cs->caller->thunk)
4521 adjust_references_in_caller (cs, pack->symbol, pack->index);
4522 return false;
4525 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4526 variable which is only dereferenced and which is represented by SYMBOL. See
4527 if we can remove ADDR reference in callers assosiated witht the call. */
4529 static void
4530 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4532 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4533 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4534 if (jfunc->type == IPA_JF_CONST)
4536 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4537 cs->lto_stmt_uid);
4538 if (!to_del)
4539 return;
4540 to_del->remove_reference ();
4541 if (dump_file)
4542 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4543 cs->caller->dump_name (), symbol->dump_name ());
4544 return;
4547 if (jfunc->type != IPA_JF_PASS_THROUGH
4548 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
4549 return;
4551 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4552 cgraph_node *caller = cs->caller;
4553 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4554 /* TODO: This consistency check may be too big and not really
4555 that useful. Consider removing it. */
4556 tree cst;
4557 if (caller_info->ipcp_orig_node)
4558 cst = caller_info->known_csts[fidx];
4559 else
4561 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4562 gcc_assert (lat->is_single_const ());
4563 cst = lat->values->value;
4565 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4566 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4567 == symbol));
4569 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4570 if (cuses == IPA_UNDESCRIBED_USE)
4571 return;
4572 gcc_assert (cuses > 0);
4573 cuses--;
4574 ipa_set_controlled_uses (caller_info, fidx, cuses);
4575 if (cuses)
4576 return;
4578 if (caller_info->ipcp_orig_node)
4580 /* Cloning machinery has created a reference here, we need to either
4581 remove it or change it to a read one. */
4582 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
4583 if (to_del && to_del->use == IPA_REF_ADDR)
4585 to_del->remove_reference ();
4586 if (dump_file)
4587 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4588 cs->caller->dump_name (), symbol->dump_name ());
4589 if (ipa_get_param_load_dereferenced (caller_info, fidx))
4591 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
4592 if (dump_file)
4593 fprintf (dump_file,
4594 " ...and replaced it with LOAD one.\n");
4599 symbol_and_index_together pack;
4600 pack.symbol = symbol;
4601 pack.index = fidx;
4602 if (caller->can_change_signature)
4603 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
4604 &pack, true);
4608 /* Return true if we would like to remove a parameter from NODE when cloning it
4609 with KNOWN_CSTS scalar constants. */
4611 static bool
4612 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
4614 auto_vec<bool, 16> surviving;
4615 bool filled_vec = false;
4616 ipa_node_params *info = ipa_node_params_sum->get (node);
4617 int i, count = ipa_get_param_count (info);
4619 for (i = 0; i < count; i++)
4621 if (!known_csts[i] && ipa_is_param_used (info, i))
4622 continue;
4624 if (!filled_vec)
4626 clone_info *info = clone_info::get (node);
4627 if (!info || !info->param_adjustments)
4628 return true;
4629 info->param_adjustments->get_surviving_params (&surviving);
4630 filled_vec = true;
4632 if (surviving.length() < (unsigned) i && surviving[i])
4633 return true;
4635 return false;
4638 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
4639 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
4640 redirect all edges in CALLERS to it. */
4642 static struct cgraph_node *
4643 create_specialized_node (struct cgraph_node *node,
4644 vec<tree> known_csts,
4645 vec<ipa_polymorphic_call_context> known_contexts,
4646 struct ipa_agg_replacement_value *aggvals,
4647 vec<cgraph_edge *> &callers)
4649 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
4650 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
4651 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
4652 struct ipa_agg_replacement_value *av;
4653 struct cgraph_node *new_node;
4654 int i, count = ipa_get_param_count (info);
4655 clone_info *cinfo = clone_info::get (node);
4656 ipa_param_adjustments *old_adjustments = cinfo
4657 ? cinfo->param_adjustments : NULL;
4658 ipa_param_adjustments *new_adjustments;
4659 gcc_assert (!info->ipcp_orig_node);
4660 gcc_assert (node->can_change_signature
4661 || !old_adjustments);
4663 if (old_adjustments)
4665 /* At the moment all IPA optimizations should use the number of
4666 parameters of the prevailing decl as the m_always_copy_start.
4667 Handling any other value would complicate the code below, so for the
4668 time bing let's only assert it is so. */
4669 gcc_assert (old_adjustments->m_always_copy_start == count
4670 || old_adjustments->m_always_copy_start < 0);
4671 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
4672 for (i = 0; i < old_adj_count; i++)
4674 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4675 if (!node->can_change_signature
4676 || old_adj->op != IPA_PARAM_OP_COPY
4677 || (!known_csts[old_adj->base_index]
4678 && ipa_is_param_used (info, old_adj->base_index)))
4680 ipa_adjusted_param new_adj = *old_adj;
4682 new_adj.prev_clone_adjustment = true;
4683 new_adj.prev_clone_index = i;
4684 vec_safe_push (new_params, new_adj);
4687 bool skip_return = old_adjustments->m_skip_return;
4688 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
4689 ipa_param_adjustments (new_params, count,
4690 skip_return));
4692 else if (node->can_change_signature
4693 && want_remove_some_param_p (node, known_csts))
4695 ipa_adjusted_param adj;
4696 memset (&adj, 0, sizeof (adj));
4697 adj.op = IPA_PARAM_OP_COPY;
4698 for (i = 0; i < count; i++)
4699 if (!known_csts[i] && ipa_is_param_used (info, i))
4701 adj.base_index = i;
4702 adj.prev_clone_index = i;
4703 vec_safe_push (new_params, adj);
4705 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
4706 ipa_param_adjustments (new_params, count, false));
4708 else
4709 new_adjustments = NULL;
4711 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
4712 for (i = 0; i < count; i++)
4714 tree t = known_csts[i];
4715 if (!t)
4716 continue;
4718 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
4720 bool load_ref = false;
4721 symtab_node *ref_symbol;
4722 if (TREE_CODE (t) == ADDR_EXPR)
4724 tree base = get_base_address (TREE_OPERAND (t, 0));
4725 if (TREE_CODE (base) == VAR_DECL
4726 && ipa_get_controlled_uses (info, i) == 0
4727 && ipa_get_param_load_dereferenced (info, i)
4728 && (ref_symbol = symtab_node::get (base)))
4730 load_ref = true;
4731 if (node->can_change_signature)
4732 for (cgraph_edge *caller : callers)
4733 adjust_references_in_caller (caller, ref_symbol, i);
4737 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
4738 if (replace_map)
4739 vec_safe_push (replace_trees, replace_map);
4741 auto_vec<cgraph_edge *, 2> self_recursive_calls;
4742 for (i = callers.length () - 1; i >= 0; i--)
4744 cgraph_edge *cs = callers[i];
4745 if (cs->caller == node)
4747 self_recursive_calls.safe_push (cs);
4748 callers.unordered_remove (i);
4752 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4753 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4754 node->decl)));
4755 new_node = node->create_virtual_clone (callers, replace_trees,
4756 new_adjustments, "constprop",
4757 suffix_counter);
4758 suffix_counter++;
4760 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
4761 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
4763 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
4764 /* Cloned edges can disappear during cloning as speculation can be
4765 resolved, check that we have one and that it comes from the last
4766 cloning. */
4767 if (cs && cs->caller == new_node)
4768 cs->redirect_callee_duplicating_thunks (new_node);
4769 /* Any future code that would make more than one clone of an outgoing
4770 edge would confuse this mechanism, so let's check that does not
4771 happen. */
4772 gcc_checking_assert (!cs
4773 || !get_next_cgraph_edge_clone (cs)
4774 || get_next_cgraph_edge_clone (cs)->caller != new_node);
4776 if (have_self_recursive_calls)
4777 new_node->expand_all_artificial_thunks ();
4779 ipa_set_node_agg_value_chain (new_node, aggvals);
4780 for (av = aggvals; av; av = av->next)
4781 new_node->maybe_create_reference (av->value, NULL);
4783 if (dump_file && (dump_flags & TDF_DETAILS))
4785 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
4786 if (known_contexts.exists ())
4788 for (i = 0; i < count; i++)
4789 if (!known_contexts[i].useless_p ())
4791 fprintf (dump_file, " known ctx %i is ", i);
4792 known_contexts[i].dump (dump_file);
4795 if (aggvals)
4796 ipa_dump_agg_replacement_values (dump_file, aggvals);
4798 ipa_check_create_node_params ();
4799 update_profiling_info (node, new_node);
4800 new_info = ipa_node_params_sum->get (new_node);
4801 new_info->ipcp_orig_node = node;
4802 new_node->ipcp_clone = true;
4803 new_info->known_csts = known_csts;
4804 new_info->known_contexts = known_contexts;
4806 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
4808 return new_node;
4811 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
4812 pass-through function to itself when the cgraph_node involved is not an
4813 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
4814 no-operation pass-through. */
4816 static bool
4817 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
4818 bool simple = true)
4820 enum availability availability;
4821 if (cs->caller == cs->callee->function_symbol (&availability)
4822 && availability > AVAIL_INTERPOSABLE
4823 && jfunc->type == IPA_JF_PASS_THROUGH
4824 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4825 && ipa_get_jf_pass_through_formal_id (jfunc) == i
4826 && ipa_node_params_sum->get (cs->caller)
4827 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
4828 return true;
4829 return false;
4832 /* Return true if JFUNC, which describes a part of an aggregate represented or
4833 pointed to by the i-th parameter of call CS, is a pass-through function to
4834 itself when the cgraph_node involved is not an IPA-CP clone.. When
4835 SIMPLE is true, further check if JFUNC is a simple no-operation
4836 pass-through. */
4838 static bool
4839 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
4840 int i, bool simple = true)
4842 enum availability availability;
4843 if (cs->caller == cs->callee->function_symbol (&availability)
4844 && availability > AVAIL_INTERPOSABLE
4845 && jfunc->jftype == IPA_JF_LOAD_AGG
4846 && jfunc->offset == jfunc->value.load_agg.offset
4847 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
4848 && jfunc->value.pass_through.formal_id == i
4849 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
4850 && ipa_node_params_sum->get (cs->caller)
4851 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
4852 return true;
4853 return false;
4856 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
4857 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
4859 static void
4860 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
4861 vec<tree> &known_csts,
4862 const vec<cgraph_edge *> &callers)
4864 ipa_node_params *info = ipa_node_params_sum->get (node);
4865 int i, count = ipa_get_param_count (info);
4867 for (i = 0; i < count; i++)
4869 struct cgraph_edge *cs;
4870 tree newval = NULL_TREE;
4871 int j;
4872 bool first = true;
4873 tree type = ipa_get_type (info, i);
4875 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
4876 continue;
4878 FOR_EACH_VEC_ELT (callers, j, cs)
4880 struct ipa_jump_func *jump_func;
4881 tree t;
4883 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4884 if (!args
4885 || i >= ipa_get_cs_argument_count (args)
4886 || (i == 0
4887 && call_passes_through_thunk (cs)))
4889 newval = NULL_TREE;
4890 break;
4892 jump_func = ipa_get_ith_jump_func (args, i);
4894 /* Besides simple pass-through jump function, arithmetic jump
4895 function could also introduce argument-direct-pass-through for
4896 self-feeding recursive call. For example,
4898 fn (int i)
4900 fn (i & 1);
4903 Given that i is 0, recursive propagation via (i & 1) also gets
4904 0. */
4905 if (self_recursive_pass_through_p (cs, jump_func, i, false))
4907 gcc_assert (newval);
4908 t = ipa_get_jf_arith_result (
4909 ipa_get_jf_pass_through_operation (jump_func),
4910 newval,
4911 ipa_get_jf_pass_through_operand (jump_func),
4912 type);
4914 else
4915 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
4916 jump_func, type);
4917 if (!t
4918 || (newval
4919 && !values_equal_for_ipcp_p (t, newval))
4920 || (!first && !newval))
4922 newval = NULL_TREE;
4923 break;
4925 else
4926 newval = t;
4927 first = false;
4930 if (newval)
4932 if (dump_file && (dump_flags & TDF_DETAILS))
4934 fprintf (dump_file, " adding an extra known scalar value ");
4935 print_ipcp_constant_value (dump_file, newval);
4936 fprintf (dump_file, " for ");
4937 ipa_dump_param (dump_file, info, i);
4938 fprintf (dump_file, "\n");
4941 known_csts[i] = newval;
4946 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
4947 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
4948 CALLERS. */
4950 static void
4951 find_more_contexts_for_caller_subset (cgraph_node *node,
4952 vec<ipa_polymorphic_call_context>
4953 *known_contexts,
4954 const vec<cgraph_edge *> &callers)
4956 ipa_node_params *info = ipa_node_params_sum->get (node);
4957 int i, count = ipa_get_param_count (info);
4959 for (i = 0; i < count; i++)
4961 cgraph_edge *cs;
4963 if (ipa_get_poly_ctx_lat (info, i)->bottom
4964 || (known_contexts->exists ()
4965 && !(*known_contexts)[i].useless_p ()))
4966 continue;
4968 ipa_polymorphic_call_context newval;
4969 bool first = true;
4970 int j;
4972 FOR_EACH_VEC_ELT (callers, j, cs)
4974 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4975 if (!args
4976 || i >= ipa_get_cs_argument_count (args))
4977 return;
4978 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4979 ipa_polymorphic_call_context ctx;
4980 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
4981 cs, i, jfunc);
4982 if (first)
4984 newval = ctx;
4985 first = false;
4987 else
4988 newval.meet_with (ctx);
4989 if (newval.useless_p ())
4990 break;
4993 if (!newval.useless_p ())
4995 if (dump_file && (dump_flags & TDF_DETAILS))
4997 fprintf (dump_file, " adding an extra known polymorphic "
4998 "context ");
4999 print_ipcp_constant_value (dump_file, newval);
5000 fprintf (dump_file, " for ");
5001 ipa_dump_param (dump_file, info, i);
5002 fprintf (dump_file, "\n");
5005 if (!known_contexts->exists ())
5006 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5007 true);
5008 (*known_contexts)[i] = newval;
5014 /* Go through PLATS and create a vector of values consisting of values and
5015 offsets (minus OFFSET) of lattices that contain only a single value. */
5017 static vec<ipa_agg_value>
5018 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5020 vec<ipa_agg_value> res = vNULL;
5022 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5023 return vNULL;
5025 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5026 if (aglat->is_single_const ())
5028 struct ipa_agg_value ti;
5029 ti.offset = aglat->offset - offset;
5030 ti.value = aglat->values->value;
5031 res.safe_push (ti);
5033 return res;
5036 /* Intersect all values in INTER with single value lattices in PLATS (while
5037 subtracting OFFSET). */
5039 static void
5040 intersect_with_plats (class ipcp_param_lattices *plats,
5041 vec<ipa_agg_value> *inter,
5042 HOST_WIDE_INT offset)
5044 struct ipcp_agg_lattice *aglat;
5045 struct ipa_agg_value *item;
5046 int k;
5048 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5050 inter->release ();
5051 return;
5054 aglat = plats->aggs;
5055 FOR_EACH_VEC_ELT (*inter, k, item)
5057 bool found = false;
5058 if (!item->value)
5059 continue;
5060 while (aglat)
5062 if (aglat->offset - offset > item->offset)
5063 break;
5064 if (aglat->offset - offset == item->offset)
5066 if (aglat->is_single_const ())
5068 tree value = aglat->values->value;
5070 if (values_equal_for_ipcp_p (item->value, value))
5071 found = true;
5073 break;
5075 aglat = aglat->next;
5077 if (!found)
5078 item->value = NULL_TREE;
5082 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5083 vector result while subtracting OFFSET from the individual value offsets. */
5085 static vec<ipa_agg_value>
5086 agg_replacements_to_vector (struct cgraph_node *node, int index,
5087 HOST_WIDE_INT offset)
5089 struct ipa_agg_replacement_value *av;
5090 vec<ipa_agg_value> res = vNULL;
5092 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5093 if (av->index == index
5094 && (av->offset - offset) >= 0)
5096 struct ipa_agg_value item;
5097 gcc_checking_assert (av->value);
5098 item.offset = av->offset - offset;
5099 item.value = av->value;
5100 res.safe_push (item);
5103 return res;
5106 /* Intersect all values in INTER with those that we have already scheduled to
5107 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5108 (while subtracting OFFSET). */
5110 static void
5111 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5112 vec<ipa_agg_value> *inter,
5113 HOST_WIDE_INT offset)
5115 struct ipa_agg_replacement_value *srcvals;
5116 struct ipa_agg_value *item;
5117 int i;
5119 srcvals = ipa_get_agg_replacements_for_node (node);
5120 if (!srcvals)
5122 inter->release ();
5123 return;
5126 FOR_EACH_VEC_ELT (*inter, i, item)
5128 struct ipa_agg_replacement_value *av;
5129 bool found = false;
5130 if (!item->value)
5131 continue;
5132 for (av = srcvals; av; av = av->next)
5134 gcc_checking_assert (av->value);
5135 if (av->index == index
5136 && av->offset - offset == item->offset)
5138 if (values_equal_for_ipcp_p (item->value, av->value))
5139 found = true;
5140 break;
5143 if (!found)
5144 item->value = NULL_TREE;
5148 /* Intersect values in INTER with aggregate values that come along edge CS to
5149 parameter number INDEX and return it. If INTER does not actually exist yet,
5150 copy all incoming values to it. If we determine we ended up with no values
5151 whatsoever, return a released vector. */
5153 static vec<ipa_agg_value>
5154 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5155 vec<ipa_agg_value> inter)
5157 struct ipa_jump_func *jfunc;
5158 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5159 if (jfunc->type == IPA_JF_PASS_THROUGH
5160 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5162 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5163 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5165 if (caller_info->ipcp_orig_node)
5167 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5168 class ipcp_param_lattices *orig_plats;
5169 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5170 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5171 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5173 if (!inter.exists ())
5174 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5175 else
5176 intersect_with_agg_replacements (cs->caller, src_idx,
5177 &inter, 0);
5178 return inter;
5181 else
5183 class ipcp_param_lattices *src_plats;
5184 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5185 if (agg_pass_through_permissible_p (src_plats, jfunc))
5187 /* Currently we do not produce clobber aggregate jump
5188 functions, adjust when we do. */
5189 gcc_checking_assert (!jfunc->agg.items);
5190 if (!inter.exists ())
5191 inter = copy_plats_to_inter (src_plats, 0);
5192 else
5193 intersect_with_plats (src_plats, &inter, 0);
5194 return inter;
5198 else if (jfunc->type == IPA_JF_ANCESTOR
5199 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5201 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5202 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5203 class ipcp_param_lattices *src_plats;
5204 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5206 if (caller_info->ipcp_orig_node)
5208 if (!inter.exists ())
5209 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5210 else
5211 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5212 delta);
5214 else
5216 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5217 /* Currently we do not produce clobber aggregate jump
5218 functions, adjust when we do. */
5219 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5220 if (!inter.exists ())
5221 inter = copy_plats_to_inter (src_plats, delta);
5222 else
5223 intersect_with_plats (src_plats, &inter, delta);
5225 return inter;
5228 if (jfunc->agg.items)
5230 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5231 struct ipa_agg_value *item;
5232 int k;
5234 if (!inter.exists ())
5235 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5237 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5238 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5239 agg_item);
5240 if (value)
5242 struct ipa_agg_value agg_value;
5244 agg_value.value = value;
5245 agg_value.offset = agg_item->offset;
5246 inter.safe_push (agg_value);
5249 else
5250 FOR_EACH_VEC_ELT (inter, k, item)
5252 int l = 0;
5253 bool found = false;
5255 if (!item->value)
5256 continue;
5258 while ((unsigned) l < jfunc->agg.items->length ())
5260 struct ipa_agg_jf_item *ti;
5261 ti = &(*jfunc->agg.items)[l];
5262 if (ti->offset > item->offset)
5263 break;
5264 if (ti->offset == item->offset)
5266 tree value;
5268 /* Besides simple pass-through aggregate jump function,
5269 arithmetic aggregate jump function could also bring
5270 same aggregate value as parameter passed-in for
5271 self-feeding recursive call. For example,
5273 fn (int *i)
5275 int j = *i & 1;
5276 fn (&j);
5279 Given that *i is 0, recursive propagation via (*i & 1)
5280 also gets 0. */
5281 if (self_recursive_agg_pass_through_p (cs, ti, index,
5282 false))
5283 value = ipa_get_jf_arith_result (
5284 ti->value.pass_through.operation,
5285 item->value,
5286 ti->value.pass_through.operand,
5287 ti->type);
5288 else
5289 value = ipa_agg_value_from_node (caller_info,
5290 cs->caller, ti);
5292 if (value && values_equal_for_ipcp_p (item->value, value))
5293 found = true;
5294 break;
5296 l++;
5298 if (!found)
5299 item->value = NULL;
5302 else
5304 inter.release ();
5305 return vNULL;
5307 return inter;
5310 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5311 from all of them. */
5313 static struct ipa_agg_replacement_value *
5314 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5315 const vec<cgraph_edge *> &callers)
5317 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5318 struct ipa_agg_replacement_value *res;
5319 struct ipa_agg_replacement_value **tail = &res;
5320 struct cgraph_edge *cs;
5321 int i, j, count = ipa_get_param_count (dest_info);
5323 FOR_EACH_VEC_ELT (callers, j, cs)
5325 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5326 if (!args)
5328 count = 0;
5329 break;
5331 int c = ipa_get_cs_argument_count (args);
5332 if (c < count)
5333 count = c;
5336 for (i = 0; i < count; i++)
5338 struct cgraph_edge *cs;
5339 vec<ipa_agg_value> inter = vNULL;
5340 struct ipa_agg_value *item;
5341 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5342 int j;
5344 /* Among other things, the following check should deal with all by_ref
5345 mismatches. */
5346 if (plats->aggs_bottom)
5347 continue;
5349 FOR_EACH_VEC_ELT (callers, j, cs)
5351 struct ipa_jump_func *jfunc
5352 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5353 if (self_recursive_pass_through_p (cs, jfunc, i)
5354 && (!plats->aggs_by_ref
5355 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5356 continue;
5357 inter = intersect_aggregates_with_edge (cs, i, inter);
5359 if (!inter.exists ())
5360 goto next_param;
5363 FOR_EACH_VEC_ELT (inter, j, item)
5365 struct ipa_agg_replacement_value *v;
5367 if (!item->value)
5368 continue;
5370 v = ggc_alloc<ipa_agg_replacement_value> ();
5371 v->index = i;
5372 v->offset = item->offset;
5373 v->value = item->value;
5374 v->by_ref = plats->aggs_by_ref;
5375 *tail = v;
5376 tail = &v->next;
5379 next_param:
5380 if (inter.exists ())
5381 inter.release ();
5383 *tail = NULL;
5384 return res;
5387 /* Determine whether CS also brings all scalar values that the NODE is
5388 specialized for. */
5390 static bool
5391 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5392 struct cgraph_node *node)
5394 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5395 int count = ipa_get_param_count (dest_info);
5396 class ipa_node_params *caller_info;
5397 class ipa_edge_args *args;
5398 int i;
5400 caller_info = ipa_node_params_sum->get (cs->caller);
5401 args = ipa_edge_args_sum->get (cs);
5402 for (i = 0; i < count; i++)
5404 struct ipa_jump_func *jump_func;
5405 tree val, t;
5407 val = dest_info->known_csts[i];
5408 if (!val)
5409 continue;
5411 if (i >= ipa_get_cs_argument_count (args))
5412 return false;
5413 jump_func = ipa_get_ith_jump_func (args, i);
5414 t = ipa_value_from_jfunc (caller_info, jump_func,
5415 ipa_get_type (dest_info, i));
5416 if (!t || !values_equal_for_ipcp_p (val, t))
5417 return false;
5419 return true;
5422 /* Determine whether CS also brings all aggregate values that NODE is
5423 specialized for. */
5424 static bool
5425 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5426 struct cgraph_node *node)
5428 struct ipa_agg_replacement_value *aggval;
5429 int i, ec, count;
5431 aggval = ipa_get_agg_replacements_for_node (node);
5432 if (!aggval)
5433 return true;
5435 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5436 count = ipa_get_param_count (clone_node_info);
5437 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5438 if (ec < count)
5439 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5440 if (aggval->index >= ec)
5441 return false;
5443 ipa_node_params *orig_node_info
5444 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5446 for (i = 0; i < count; i++)
5448 class ipcp_param_lattices *plats;
5449 bool interesting = false;
5450 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5451 if (aggval->index == i)
5453 interesting = true;
5454 break;
5456 if (!interesting)
5457 continue;
5459 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5460 if (plats->aggs_bottom)
5461 return false;
5463 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5464 if (!values.exists ())
5465 return false;
5467 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5468 if (aggval->index == i)
5470 struct ipa_agg_value *item;
5471 int j;
5472 bool found = false;
5473 FOR_EACH_VEC_ELT (values, j, item)
5474 if (item->value
5475 && item->offset == av->offset
5476 && values_equal_for_ipcp_p (item->value, av->value))
5478 found = true;
5479 break;
5481 if (!found)
5483 values.release ();
5484 return false;
5487 values.release ();
5489 return true;
5492 /* Given an original NODE and a VAL for which we have already created a
5493 specialized clone, look whether there are incoming edges that still lead
5494 into the old node but now also bring the requested value and also conform to
5495 all other criteria such that they can be redirected the special node.
5496 This function can therefore redirect the final edge in a SCC. */
5498 template <typename valtype>
5499 static void
5500 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5502 ipcp_value_source<valtype> *src;
5503 profile_count redirected_sum = profile_count::zero ();
5505 for (src = val->sources; src; src = src->next)
5507 struct cgraph_edge *cs = src->cs;
5508 while (cs)
5510 if (cgraph_edge_brings_value_p (cs, src, node, val)
5511 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5512 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5514 if (dump_file)
5515 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5516 cs->caller->dump_name (),
5517 val->spec_node->dump_name ());
5519 cs->redirect_callee_duplicating_thunks (val->spec_node);
5520 val->spec_node->expand_all_artificial_thunks ();
5521 if (cs->count.ipa ().initialized_p ())
5522 redirected_sum = redirected_sum + cs->count.ipa ();
5524 cs = get_next_cgraph_edge_clone (cs);
5528 if (redirected_sum.nonzero_p ())
5529 update_specialized_profile (val->spec_node, node, redirected_sum);
5532 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5534 static bool
5535 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5537 ipa_polymorphic_call_context *ctx;
5538 int i;
5540 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5541 if (!ctx->useless_p ())
5542 return true;
5543 return false;
5546 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5548 static vec<ipa_polymorphic_call_context>
5549 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5551 if (known_contexts_useful_p (known_contexts))
5552 return known_contexts.copy ();
5553 else
5554 return vNULL;
5557 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5558 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5559 copy too. */
5561 static void
5562 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5563 vec<tree> *known_csts,
5564 vec<ipa_polymorphic_call_context> *known_contexts,
5565 ipcp_value<tree> *val, int index)
5567 *known_csts = avals->m_known_vals.copy ();
5568 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5569 (*known_csts)[index] = val->value;
5572 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5573 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5574 INDEX. */
5576 static void
5577 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5578 vec<tree> *known_csts,
5579 vec<ipa_polymorphic_call_context> *known_contexts,
5580 ipcp_value<ipa_polymorphic_call_context> *val,
5581 int index)
5583 *known_csts = avals->m_known_vals.copy ();
5584 *known_contexts = avals->m_known_contexts.copy ();
5585 (*known_contexts)[index] = val->value;
5588 /* Return true if OFFSET indicates this was not an aggregate value or there is
5589 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5590 AGGVALS list. */
5592 DEBUG_FUNCTION bool
5593 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
5594 int index, HOST_WIDE_INT offset, tree value)
5596 if (offset == -1)
5597 return true;
5599 while (aggvals)
5601 if (aggvals->index == index
5602 && aggvals->offset == offset
5603 && values_equal_for_ipcp_p (aggvals->value, value))
5604 return true;
5605 aggvals = aggvals->next;
5607 return false;
5610 /* Return true if offset is minus one because source of a polymorphic context
5611 cannot be an aggregate value. */
5613 DEBUG_FUNCTION bool
5614 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
5615 int , HOST_WIDE_INT offset,
5616 ipa_polymorphic_call_context)
5618 return offset == -1;
5621 /* Decide whether to create a special version of NODE for value VAL of
5622 parameter at the given INDEX. If OFFSET is -1, the value is for the
5623 parameter itself, otherwise it is stored at the given OFFSET of the
5624 parameter. AVALS describes the other already known values. */
5626 template <typename valtype>
5627 static bool
5628 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5629 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals)
5631 struct ipa_agg_replacement_value *aggvals;
5632 int caller_count;
5633 sreal freq_sum;
5634 profile_count count_sum;
5635 vec<cgraph_edge *> callers;
5637 if (val->spec_node)
5639 perhaps_add_new_callers (node, val);
5640 return false;
5642 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5644 if (dump_file && (dump_flags & TDF_DETAILS))
5645 fprintf (dump_file, " Ignoring candidate value because "
5646 "maximum unit size would be reached with %li.\n",
5647 val->local_size_cost + overall_size);
5648 return false;
5650 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &count_sum,
5651 &caller_count))
5652 return false;
5654 if (!dbg_cnt (ipa_cp_values))
5655 return false;
5657 if (dump_file && (dump_flags & TDF_DETAILS))
5659 fprintf (dump_file, " - considering value ");
5660 print_ipcp_constant_value (dump_file, val->value);
5661 fprintf (dump_file, " for ");
5662 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
5663 if (offset != -1)
5664 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5665 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
5668 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5669 freq_sum, count_sum,
5670 val->local_size_cost)
5671 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
5672 freq_sum, count_sum, val->prop_size_cost))
5673 return false;
5675 if (dump_file)
5676 fprintf (dump_file, " Creating a specialized node of %s.\n",
5677 node->dump_name ());
5679 vec<tree> known_csts;
5680 vec<ipa_polymorphic_call_context> known_contexts;
5682 callers = gather_edges_for_value (val, node, caller_count);
5683 if (offset == -1)
5684 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
5685 else
5687 known_csts = avals->m_known_vals.copy ();
5688 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5690 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5691 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5692 aggvals = find_aggregate_values_for_callers_subset (node, callers);
5693 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5694 offset, val->value));
5695 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5696 aggvals, callers);
5697 callers.release ();
5698 overall_size += val->local_size_cost;
5699 if (dump_file && (dump_flags & TDF_DETAILS))
5700 fprintf (dump_file, " overall size reached %li\n",
5701 overall_size);
5703 /* TODO: If for some lattice there is only one other known value
5704 left, make a special node for it too. */
5706 return true;
5709 /* Decide whether and what specialized clones of NODE should be created. */
5711 static bool
5712 decide_whether_version_node (struct cgraph_node *node)
5714 ipa_node_params *info = ipa_node_params_sum->get (node);
5715 int i, count = ipa_get_param_count (info);
5716 bool ret = false;
5718 if (count == 0)
5719 return false;
5721 if (dump_file && (dump_flags & TDF_DETAILS))
5722 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
5723 node->dump_name ());
5725 ipa_auto_call_arg_values avals;
5726 gather_context_independent_values (info, &avals, false, NULL);
5728 for (i = 0; i < count;i++)
5730 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5731 ipcp_lattice<tree> *lat = &plats->itself;
5732 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
5734 if (!lat->bottom
5735 && !avals.m_known_vals[i])
5737 ipcp_value<tree> *val;
5738 for (val = lat->values; val; val = val->next)
5739 ret |= decide_about_value (node, i, -1, val, &avals);
5742 if (!plats->aggs_bottom)
5744 struct ipcp_agg_lattice *aglat;
5745 ipcp_value<tree> *val;
5746 for (aglat = plats->aggs; aglat; aglat = aglat->next)
5747 if (!aglat->bottom && aglat->values
5748 /* If the following is false, the one value has been considered
5749 for cloning for all contexts. */
5750 && (plats->aggs_contain_variable
5751 || !aglat->is_single_const ()))
5752 for (val = aglat->values; val; val = val->next)
5753 ret |= decide_about_value (node, i, aglat->offset, val, &avals);
5756 if (!ctxlat->bottom
5757 && avals.m_known_contexts[i].useless_p ())
5759 ipcp_value<ipa_polymorphic_call_context> *val;
5760 for (val = ctxlat->values; val; val = val->next)
5761 ret |= decide_about_value (node, i, -1, val, &avals);
5765 if (info->do_clone_for_all_contexts)
5767 if (!dbg_cnt (ipa_cp_values))
5769 info->do_clone_for_all_contexts = false;
5770 return ret;
5773 struct cgraph_node *clone;
5774 auto_vec<cgraph_edge *> callers = node->collect_callers ();
5776 for (int i = callers.length () - 1; i >= 0; i--)
5778 cgraph_edge *cs = callers[i];
5779 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5781 if (caller_info && caller_info->node_dead)
5782 callers.unordered_remove (i);
5785 if (!adjust_callers_for_value_intersection (callers, node))
5787 /* If node is not called by anyone, or all its caller edges are
5788 self-recursive, the node is not really in use, no need to do
5789 cloning. */
5790 info->do_clone_for_all_contexts = false;
5791 return ret;
5794 if (dump_file)
5795 fprintf (dump_file, " - Creating a specialized node of %s "
5796 "for all known contexts.\n", node->dump_name ());
5798 vec<tree> known_csts = avals.m_known_vals.copy ();
5799 vec<ipa_polymorphic_call_context> known_contexts
5800 = copy_useful_known_contexts (avals.m_known_contexts);
5801 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5802 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5803 ipa_agg_replacement_value *aggvals
5804 = find_aggregate_values_for_callers_subset (node, callers);
5806 if (!known_contexts_useful_p (known_contexts))
5808 known_contexts.release ();
5809 known_contexts = vNULL;
5811 clone = create_specialized_node (node, known_csts, known_contexts,
5812 aggvals, callers);
5813 info->do_clone_for_all_contexts = false;
5814 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
5815 ret = true;
5818 return ret;
5821 /* Transitively mark all callees of NODE within the same SCC as not dead. */
5823 static void
5824 spread_undeadness (struct cgraph_node *node)
5826 struct cgraph_edge *cs;
5828 for (cs = node->callees; cs; cs = cs->next_callee)
5829 if (ipa_edge_within_scc (cs))
5831 struct cgraph_node *callee;
5832 class ipa_node_params *info;
5834 callee = cs->callee->function_symbol (NULL);
5835 info = ipa_node_params_sum->get (callee);
5837 if (info && info->node_dead)
5839 info->node_dead = 0;
5840 spread_undeadness (callee);
5845 /* Return true if NODE has a caller from outside of its SCC that is not
5846 dead. Worker callback for cgraph_for_node_and_aliases. */
5848 static bool
5849 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
5850 void *data ATTRIBUTE_UNUSED)
5852 struct cgraph_edge *cs;
5854 for (cs = node->callers; cs; cs = cs->next_caller)
5855 if (cs->caller->thunk
5856 && cs->caller->call_for_symbol_thunks_and_aliases
5857 (has_undead_caller_from_outside_scc_p, NULL, true))
5858 return true;
5859 else if (!ipa_edge_within_scc (cs))
5861 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5862 if (!caller_info /* Unoptimized caller are like dead ones. */
5863 || !caller_info->node_dead)
5864 return true;
5866 return false;
5870 /* Identify nodes within the same SCC as NODE which are no longer needed
5871 because of new clones and will be removed as unreachable. */
5873 static void
5874 identify_dead_nodes (struct cgraph_node *node)
5876 struct cgraph_node *v;
5877 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5878 if (v->local)
5880 ipa_node_params *info = ipa_node_params_sum->get (v);
5881 if (info
5882 && !v->call_for_symbol_thunks_and_aliases
5883 (has_undead_caller_from_outside_scc_p, NULL, true))
5884 info->node_dead = 1;
5887 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5889 ipa_node_params *info = ipa_node_params_sum->get (v);
5890 if (info && !info->node_dead)
5891 spread_undeadness (v);
5894 if (dump_file && (dump_flags & TDF_DETAILS))
5896 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5897 if (ipa_node_params_sum->get (v)
5898 && ipa_node_params_sum->get (v)->node_dead)
5899 fprintf (dump_file, " Marking node as dead: %s.\n",
5900 v->dump_name ());
5904 /* The decision stage. Iterate over the topological order of call graph nodes
5905 TOPO and make specialized clones if deemed beneficial. */
5907 static void
5908 ipcp_decision_stage (class ipa_topo_info *topo)
5910 int i;
5912 if (dump_file)
5913 fprintf (dump_file, "\nIPA decision stage:\n\n");
5915 for (i = topo->nnodes - 1; i >= 0; i--)
5917 struct cgraph_node *node = topo->order[i];
5918 bool change = false, iterate = true;
5920 while (iterate)
5922 struct cgraph_node *v;
5923 iterate = false;
5924 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5925 if (v->has_gimple_body_p ()
5926 && ipcp_versionable_function_p (v))
5927 iterate |= decide_whether_version_node (v);
5929 change |= iterate;
5931 if (change)
5932 identify_dead_nodes (node);
5936 /* Look up all the bits information that we have discovered and copy it over
5937 to the transformation summary. */
5939 static void
5940 ipcp_store_bits_results (void)
5942 cgraph_node *node;
5944 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
5946 ipa_node_params *info = ipa_node_params_sum->get (node);
5947 bool dumped_sth = false;
5948 bool found_useful_result = false;
5950 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
5952 if (dump_file)
5953 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
5954 "; -fipa-bit-cp: disabled.\n",
5955 node->dump_name ());
5956 continue;
5959 if (info->ipcp_orig_node)
5960 info = ipa_node_params_sum->get (info->ipcp_orig_node);
5961 if (!info->lattices)
5962 /* Newly expanded artificial thunks do not have lattices. */
5963 continue;
5965 unsigned count = ipa_get_param_count (info);
5966 for (unsigned i = 0; i < count; i++)
5968 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5969 if (plats->bits_lattice.constant_p ())
5971 found_useful_result = true;
5972 break;
5976 if (!found_useful_result)
5977 continue;
5979 ipcp_transformation_initialize ();
5980 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
5981 vec_safe_reserve_exact (ts->bits, count);
5983 for (unsigned i = 0; i < count; i++)
5985 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5986 ipa_bits *jfbits;
5988 if (plats->bits_lattice.constant_p ())
5990 jfbits
5991 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
5992 plats->bits_lattice.get_mask ());
5993 if (!dbg_cnt (ipa_cp_bits))
5994 jfbits = NULL;
5996 else
5997 jfbits = NULL;
5999 ts->bits->quick_push (jfbits);
6000 if (!dump_file || !jfbits)
6001 continue;
6002 if (!dumped_sth)
6004 fprintf (dump_file, "Propagated bits info for function %s:\n",
6005 node->dump_name ());
6006 dumped_sth = true;
6008 fprintf (dump_file, " param %i: value = ", i);
6009 print_hex (jfbits->value, dump_file);
6010 fprintf (dump_file, ", mask = ");
6011 print_hex (jfbits->mask, dump_file);
6012 fprintf (dump_file, "\n");
6017 /* Look up all VR information that we have discovered and copy it over
6018 to the transformation summary. */
6020 static void
6021 ipcp_store_vr_results (void)
6023 cgraph_node *node;
6025 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6027 ipa_node_params *info = ipa_node_params_sum->get (node);
6028 bool found_useful_result = false;
6030 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6032 if (dump_file)
6033 fprintf (dump_file, "Not considering %s for VR discovery "
6034 "and propagate; -fipa-ipa-vrp: disabled.\n",
6035 node->dump_name ());
6036 continue;
6039 if (info->ipcp_orig_node)
6040 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6041 if (!info->lattices)
6042 /* Newly expanded artificial thunks do not have lattices. */
6043 continue;
6045 unsigned count = ipa_get_param_count (info);
6046 for (unsigned i = 0; i < count; i++)
6048 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6049 if (!plats->m_value_range.bottom_p ()
6050 && !plats->m_value_range.top_p ())
6052 found_useful_result = true;
6053 break;
6056 if (!found_useful_result)
6057 continue;
6059 ipcp_transformation_initialize ();
6060 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6061 vec_safe_reserve_exact (ts->m_vr, count);
6063 for (unsigned i = 0; i < count; i++)
6065 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6066 ipa_vr vr;
6068 if (!plats->m_value_range.bottom_p ()
6069 && !plats->m_value_range.top_p ()
6070 && dbg_cnt (ipa_cp_vr))
6072 vr.known = true;
6073 vr.type = plats->m_value_range.m_vr.kind ();
6074 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6075 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6077 else
6079 vr.known = false;
6080 vr.type = VR_VARYING;
6081 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6083 ts->m_vr->quick_push (vr);
6088 /* The IPCP driver. */
6090 static unsigned int
6091 ipcp_driver (void)
6093 class ipa_topo_info topo;
6095 if (edge_clone_summaries == NULL)
6096 edge_clone_summaries = new edge_clone_summary_t (symtab);
6098 ipa_check_create_node_params ();
6099 ipa_check_create_edge_args ();
6100 clone_num_suffixes = new hash_map<const char *, unsigned>;
6102 if (dump_file)
6104 fprintf (dump_file, "\nIPA structures before propagation:\n");
6105 if (dump_flags & TDF_DETAILS)
6106 ipa_print_all_params (dump_file);
6107 ipa_print_all_jump_functions (dump_file);
6110 /* Topological sort. */
6111 build_toporder_info (&topo);
6112 /* Do the interprocedural propagation. */
6113 ipcp_propagate_stage (&topo);
6114 /* Decide what constant propagation and cloning should be performed. */
6115 ipcp_decision_stage (&topo);
6116 /* Store results of bits propagation. */
6117 ipcp_store_bits_results ();
6118 /* Store results of value range propagation. */
6119 ipcp_store_vr_results ();
6121 /* Free all IPCP structures. */
6122 delete clone_num_suffixes;
6123 free_toporder_info (&topo);
6124 delete edge_clone_summaries;
6125 edge_clone_summaries = NULL;
6126 ipa_free_all_structures_after_ipa_cp ();
6127 if (dump_file)
6128 fprintf (dump_file, "\nIPA constant propagation end\n");
6129 return 0;
6132 /* Initialization and computation of IPCP data structures. This is the initial
6133 intraprocedural analysis of functions, which gathers information to be
6134 propagated later on. */
6136 static void
6137 ipcp_generate_summary (void)
6139 struct cgraph_node *node;
6141 if (dump_file)
6142 fprintf (dump_file, "\nIPA constant propagation start:\n");
6143 ipa_register_cgraph_hooks ();
6145 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6146 ipa_analyze_node (node);
6149 namespace {
6151 const pass_data pass_data_ipa_cp =
6153 IPA_PASS, /* type */
6154 "cp", /* name */
6155 OPTGROUP_NONE, /* optinfo_flags */
6156 TV_IPA_CONSTANT_PROP, /* tv_id */
6157 0, /* properties_required */
6158 0, /* properties_provided */
6159 0, /* properties_destroyed */
6160 0, /* todo_flags_start */
6161 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6164 class pass_ipa_cp : public ipa_opt_pass_d
6166 public:
6167 pass_ipa_cp (gcc::context *ctxt)
6168 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6169 ipcp_generate_summary, /* generate_summary */
6170 NULL, /* write_summary */
6171 NULL, /* read_summary */
6172 ipcp_write_transformation_summaries, /*
6173 write_optimization_summary */
6174 ipcp_read_transformation_summaries, /*
6175 read_optimization_summary */
6176 NULL, /* stmt_fixup */
6177 0, /* function_transform_todo_flags_start */
6178 ipcp_transform_function, /* function_transform */
6179 NULL) /* variable_transform */
6182 /* opt_pass methods: */
6183 virtual bool gate (function *)
6185 /* FIXME: We should remove the optimize check after we ensure we never run
6186 IPA passes when not optimizing. */
6187 return (flag_ipa_cp && optimize) || in_lto_p;
6190 virtual unsigned int execute (function *) { return ipcp_driver (); }
6192 }; // class pass_ipa_cp
6194 } // anon namespace
6196 ipa_opt_pass_d *
6197 make_pass_ipa_cp (gcc::context *ctxt)
6199 return new pass_ipa_cp (ctxt);
6202 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6203 within the same process. For use by toplev::finalize. */
6205 void
6206 ipa_cp_c_finalize (void)
6208 max_count = profile_count::uninitialized ();
6209 overall_size = 0;
6210 orig_overall_size = 0;
6211 ipcp_free_transformation_sum ();