Skip -fwhole-program when merging LTO options.
[official-gcc.git] / gcc / ipa-cp.cc
blob300bec54bbdee4a3876ef25901e34260949e1cb9
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "alloc-pool.h"
113 #include "tree-pass.h"
114 #include "cgraph.h"
115 #include "diagnostic.h"
116 #include "fold-const.h"
117 #include "gimple-iterator.h"
118 #include "gimple-fold.h"
119 #include "symbol-summary.h"
120 #include "tree-vrp.h"
121 #include "ipa-prop.h"
122 #include "tree-pretty-print.h"
123 #include "tree-inline.h"
124 #include "ipa-fnsummary.h"
125 #include "ipa-utils.h"
126 #include "tree-ssa-ccp.h"
127 #include "stringpool.h"
128 #include "attribs.h"
129 #include "dbgcnt.h"
130 #include "symtab-clones.h"
132 template <typename valtype> class ipcp_value;
134 /* Describes a particular source for an IPA-CP value. */
136 template <typename valtype>
137 struct ipcp_value_source
139 public:
140 /* Aggregate offset of the source, negative if the source is scalar value of
141 the argument itself. */
142 HOST_WIDE_INT offset;
143 /* The incoming edge that brought the value. */
144 cgraph_edge *cs;
145 /* If the jump function that resulted into his value was a pass-through or an
146 ancestor, this is the ipcp_value of the caller from which the described
147 value has been derived. Otherwise it is NULL. */
148 ipcp_value<valtype> *val;
149 /* Next pointer in a linked list of sources of a value. */
150 ipcp_value_source *next;
151 /* If the jump function that resulted into his value was a pass-through or an
152 ancestor, this is the index of the parameter of the caller the jump
153 function references. */
154 int index;
157 /* Common ancestor for all ipcp_value instantiations. */
159 class ipcp_value_base
161 public:
162 /* Time benefit and that specializing the function for this value would bring
163 about in this function alone. */
164 sreal local_time_benefit;
165 /* Time benefit that specializing the function for this value can bring about
166 in it's callees. */
167 sreal prop_time_benefit;
168 /* Size cost that specializing the function for this value would bring about
169 in this function alone. */
170 int local_size_cost;
171 /* Size cost that specializing the function for this value can bring about in
172 it's callees. */
173 int prop_size_cost;
175 ipcp_value_base ()
176 : local_time_benefit (0), prop_time_benefit (0),
177 local_size_cost (0), prop_size_cost (0) {}
180 /* Describes one particular value stored in struct ipcp_lattice. */
182 template <typename valtype>
183 class ipcp_value : public ipcp_value_base
185 public:
186 /* The actual value for the given parameter. */
187 valtype value;
188 /* The list of sources from which this value originates. */
189 ipcp_value_source <valtype> *sources = nullptr;
190 /* Next pointers in a linked list of all values in a lattice. */
191 ipcp_value *next = nullptr;
192 /* Next pointers in a linked list of values in a strongly connected component
193 of values. */
194 ipcp_value *scc_next = nullptr;
195 /* Next pointers in a linked list of SCCs of values sorted topologically
196 according their sources. */
197 ipcp_value *topo_next = nullptr;
198 /* A specialized node created for this value, NULL if none has been (so far)
199 created. */
200 cgraph_node *spec_node = nullptr;
201 /* Depth first search number and low link for topological sorting of
202 values. */
203 int dfs = 0;
204 int low_link = 0;
205 /* SCC number to identify values which recursively feed into each other.
206 Values in the same SCC have the same SCC number. */
207 int scc_no = 0;
208 /* Non zero if the value is generated from another value in the same lattice
209 for a self-recursive call, the actual number is how many times the
210 operation has been performed. In the unlikely event of the value being
211 present in two chains fo self-recursive value generation chains, it is the
212 maximum. */
213 unsigned self_recursion_generated_level = 0;
214 /* True if this value is currently on the topo-sort stack. */
215 bool on_stack = false;
217 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
218 HOST_WIDE_INT offset);
220 /* Return true if both THIS value and O feed into each other. */
222 bool same_scc (const ipcp_value<valtype> *o)
224 return o->scc_no == scc_no;
227 /* Return true, if a this value has been generated for a self-recursive call as
228 a result of an arithmetic pass-through jump-function acting on a value in
229 the same lattice function. */
231 bool self_recursion_generated_p ()
233 return self_recursion_generated_level > 0;
237 /* Lattice describing potential values of a formal parameter of a function, or
238 a part of an aggregate. TOP is represented by a lattice with zero values
239 and with contains_variable and bottom flags cleared. BOTTOM is represented
240 by a lattice with the bottom flag set. In that case, values and
241 contains_variable flag should be disregarded. */
243 template <typename valtype>
244 struct ipcp_lattice
246 public:
247 /* The list of known values and types in this lattice. Note that values are
248 not deallocated if a lattice is set to bottom because there may be value
249 sources referencing them. */
250 ipcp_value<valtype> *values;
251 /* Number of known values and types in this lattice. */
252 int values_count;
253 /* The lattice contains a variable component (in addition to values). */
254 bool contains_variable;
255 /* The value of the lattice is bottom (i.e. variable and unusable for any
256 propagation). */
257 bool bottom;
259 inline bool is_single_const ();
260 inline bool set_to_bottom ();
261 inline bool set_contains_variable ();
262 bool add_value (valtype newval, cgraph_edge *cs,
263 ipcp_value<valtype> *src_val = NULL,
264 int src_idx = 0, HOST_WIDE_INT offset = -1,
265 ipcp_value<valtype> **val_p = NULL,
266 unsigned same_lat_gen_level = 0);
267 void print (FILE * f, bool dump_sources, bool dump_benefits);
270 /* Lattice of tree values with an offset to describe a part of an
271 aggregate. */
273 struct ipcp_agg_lattice : public ipcp_lattice<tree>
275 public:
276 /* Offset that is being described by this lattice. */
277 HOST_WIDE_INT offset;
278 /* Size so that we don't have to re-compute it every time we traverse the
279 list. Must correspond to TYPE_SIZE of all lat values. */
280 HOST_WIDE_INT size;
281 /* Next element of the linked list. */
282 struct ipcp_agg_lattice *next;
285 /* Lattice of known bits, only capable of holding one value.
286 Bitwise constant propagation propagates which bits of a
287 value are constant.
288 For eg:
289 int f(int x)
291 return some_op (x);
294 int f1(int y)
296 if (cond)
297 return f (y & 0xff);
298 else
299 return f (y & 0xf);
302 In the above case, the param 'x' will always have all
303 the bits (except the bits in lsb) set to 0.
304 Hence the mask of 'x' would be 0xff. The mask
305 reflects that the bits in lsb are unknown.
306 The actual propagated value is given by m_value & ~m_mask. */
308 class ipcp_bits_lattice
310 public:
311 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
312 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
313 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
314 bool set_to_bottom ();
315 bool set_to_constant (widest_int, widest_int);
316 bool known_nonzero_p () const;
318 widest_int get_value () const { return m_value; }
319 widest_int get_mask () const { return m_mask; }
321 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
322 enum tree_code, tree, bool);
324 bool meet_with (widest_int, widest_int, unsigned);
326 void print (FILE *);
328 private:
329 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
331 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
332 If a bit in mask is set to 0, then the corresponding bit in
333 value is known to be constant. */
334 widest_int m_value, m_mask;
336 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
337 void get_value_and_mask (tree, widest_int *, widest_int *);
340 /* Lattice of value ranges. */
342 class ipcp_vr_lattice
344 public:
345 value_range m_vr;
347 inline bool bottom_p () const;
348 inline bool top_p () const;
349 inline bool set_to_bottom ();
350 bool meet_with (const value_range *p_vr);
351 bool meet_with (const ipcp_vr_lattice &other);
352 void init () { gcc_assert (m_vr.undefined_p ()); }
353 void print (FILE * f);
355 private:
356 bool meet_with_1 (const value_range *other_vr);
359 /* Structure containing lattices for a parameter itself and for pieces of
360 aggregates that are passed in the parameter or by a reference in a parameter
361 plus some other useful flags. */
363 class ipcp_param_lattices
365 public:
366 /* Lattice describing the value of the parameter itself. */
367 ipcp_lattice<tree> itself;
368 /* Lattice describing the polymorphic contexts of a parameter. */
369 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
370 /* Lattices describing aggregate parts. */
371 ipcp_agg_lattice *aggs;
372 /* Lattice describing known bits. */
373 ipcp_bits_lattice bits_lattice;
374 /* Lattice describing value range. */
375 ipcp_vr_lattice m_value_range;
376 /* Number of aggregate lattices */
377 int aggs_count;
378 /* True if aggregate data were passed by reference (as opposed to by
379 value). */
380 bool aggs_by_ref;
381 /* All aggregate lattices contain a variable component (in addition to
382 values). */
383 bool aggs_contain_variable;
384 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
385 for any propagation). */
386 bool aggs_bottom;
388 /* There is a virtual call based on this parameter. */
389 bool virt_call;
392 /* Allocation pools for values and their sources in ipa-cp. */
394 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
395 ("IPA-CP constant values");
397 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
398 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
400 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
401 ("IPA-CP value sources");
403 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
404 ("IPA_CP aggregate lattices");
406 /* Base count to use in heuristics when using profile feedback. */
408 static profile_count base_count;
410 /* Original overall size of the program. */
412 static long overall_size, orig_overall_size;
414 /* Node name to unique clone suffix number map. */
415 static hash_map<const char *, unsigned> *clone_num_suffixes;
417 /* Return the param lattices structure corresponding to the Ith formal
418 parameter of the function described by INFO. */
419 static inline class ipcp_param_lattices *
420 ipa_get_parm_lattices (class ipa_node_params *info, int i)
422 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
423 gcc_checking_assert (!info->ipcp_orig_node);
424 gcc_checking_assert (info->lattices);
425 return &(info->lattices[i]);
428 /* Return the lattice corresponding to the scalar value of the Ith formal
429 parameter of the function described by INFO. */
430 static inline ipcp_lattice<tree> *
431 ipa_get_scalar_lat (class ipa_node_params *info, int i)
433 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
434 return &plats->itself;
437 /* Return the lattice corresponding to the scalar value of the Ith formal
438 parameter of the function described by INFO. */
439 static inline ipcp_lattice<ipa_polymorphic_call_context> *
440 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
442 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
443 return &plats->ctxlat;
446 /* Return whether LAT is a lattice with a single constant and without an
447 undefined value. */
449 template <typename valtype>
450 inline bool
451 ipcp_lattice<valtype>::is_single_const ()
453 if (bottom || contains_variable || values_count != 1)
454 return false;
455 else
456 return true;
459 /* Return true iff X and Y should be considered equal values by IPA-CP. */
461 static bool
462 values_equal_for_ipcp_p (tree x, tree y)
464 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
466 if (x == y)
467 return true;
469 if (TREE_CODE (x) == ADDR_EXPR
470 && TREE_CODE (y) == ADDR_EXPR
471 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
472 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
473 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
474 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
475 else
476 return operand_equal_p (x, y, 0);
479 /* Print V which is extracted from a value in a lattice to F. */
481 static void
482 print_ipcp_constant_value (FILE * f, tree v)
484 if (TREE_CODE (v) == ADDR_EXPR
485 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
487 fprintf (f, "& ");
488 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
490 else
491 print_generic_expr (f, v);
494 /* Print V which is extracted from a value in a lattice to F. */
496 static void
497 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
499 v.dump(f, false);
502 /* Print a lattice LAT to F. */
504 template <typename valtype>
505 void
506 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
508 ipcp_value<valtype> *val;
509 bool prev = false;
511 if (bottom)
513 fprintf (f, "BOTTOM\n");
514 return;
517 if (!values_count && !contains_variable)
519 fprintf (f, "TOP\n");
520 return;
523 if (contains_variable)
525 fprintf (f, "VARIABLE");
526 prev = true;
527 if (dump_benefits)
528 fprintf (f, "\n");
531 for (val = values; val; val = val->next)
533 if (dump_benefits && prev)
534 fprintf (f, " ");
535 else if (!dump_benefits && prev)
536 fprintf (f, ", ");
537 else
538 prev = true;
540 print_ipcp_constant_value (f, val->value);
542 if (dump_sources)
544 ipcp_value_source<valtype> *s;
546 if (val->self_recursion_generated_p ())
547 fprintf (f, " [self_gen(%i), from:",
548 val->self_recursion_generated_level);
549 else
550 fprintf (f, " [scc: %i, from:", val->scc_no);
551 for (s = val->sources; s; s = s->next)
552 fprintf (f, " %i(%f)", s->cs->caller->order,
553 s->cs->sreal_frequency ().to_double ());
554 fprintf (f, "]");
557 if (dump_benefits)
558 fprintf (f, " [loc_time: %g, loc_size: %i, "
559 "prop_time: %g, prop_size: %i]\n",
560 val->local_time_benefit.to_double (), val->local_size_cost,
561 val->prop_time_benefit.to_double (), val->prop_size_cost);
563 if (!dump_benefits)
564 fprintf (f, "\n");
567 void
568 ipcp_bits_lattice::print (FILE *f)
570 if (top_p ())
571 fprintf (f, " Bits unknown (TOP)\n");
572 else if (bottom_p ())
573 fprintf (f, " Bits unusable (BOTTOM)\n");
574 else
576 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
577 fprintf (f, ", mask = "); print_hex (get_mask (), f);
578 fprintf (f, "\n");
582 /* Print value range lattice to F. */
584 void
585 ipcp_vr_lattice::print (FILE * f)
587 dump_value_range (f, &m_vr);
590 /* Print all ipcp_lattices of all functions to F. */
592 static void
593 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
595 struct cgraph_node *node;
596 int i, count;
598 fprintf (f, "\nLattices:\n");
599 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
601 class ipa_node_params *info;
603 info = ipa_node_params_sum->get (node);
604 /* Skip unoptimized functions and constprop clones since we don't make
605 lattices for them. */
606 if (!info || info->ipcp_orig_node)
607 continue;
608 fprintf (f, " Node: %s:\n", node->dump_name ());
609 count = ipa_get_param_count (info);
610 for (i = 0; i < count; i++)
612 struct ipcp_agg_lattice *aglat;
613 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
614 fprintf (f, " param [%d]: ", i);
615 plats->itself.print (f, dump_sources, dump_benefits);
616 fprintf (f, " ctxs: ");
617 plats->ctxlat.print (f, dump_sources, dump_benefits);
618 plats->bits_lattice.print (f);
619 fprintf (f, " ");
620 plats->m_value_range.print (f);
621 fprintf (f, "\n");
622 if (plats->virt_call)
623 fprintf (f, " virt_call flag set\n");
625 if (plats->aggs_bottom)
627 fprintf (f, " AGGS BOTTOM\n");
628 continue;
630 if (plats->aggs_contain_variable)
631 fprintf (f, " AGGS VARIABLE\n");
632 for (aglat = plats->aggs; aglat; aglat = aglat->next)
634 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
635 plats->aggs_by_ref ? "ref " : "", aglat->offset);
636 aglat->print (f, dump_sources, dump_benefits);
642 /* Determine whether it is at all technically possible to create clones of NODE
643 and store this information in the ipa_node_params structure associated
644 with NODE. */
646 static void
647 determine_versionability (struct cgraph_node *node,
648 class ipa_node_params *info)
650 const char *reason = NULL;
652 /* There are a number of generic reasons functions cannot be versioned. We
653 also cannot remove parameters if there are type attributes such as fnspec
654 present. */
655 if (node->alias || node->thunk)
656 reason = "alias or thunk";
657 else if (!node->versionable)
658 reason = "not a tree_versionable_function";
659 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
660 reason = "insufficient body availability";
661 else if (!opt_for_fn (node->decl, optimize)
662 || !opt_for_fn (node->decl, flag_ipa_cp))
663 reason = "non-optimized function";
664 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
666 /* Ideally we should clone the SIMD clones themselves and create
667 vector copies of them, so IPA-cp and SIMD clones can happily
668 coexist, but that may not be worth the effort. */
669 reason = "function has SIMD clones";
671 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
673 /* Ideally we should clone the target clones themselves and create
674 copies of them, so IPA-cp and target clones can happily
675 coexist, but that may not be worth the effort. */
676 reason = "function target_clones attribute";
678 /* Don't clone decls local to a comdat group; it breaks and for C++
679 decloned constructors, inlining is always better anyway. */
680 else if (node->comdat_local_p ())
681 reason = "comdat-local function";
682 else if (node->calls_comdat_local)
684 /* TODO: call is versionable if we make sure that all
685 callers are inside of a comdat group. */
686 reason = "calls comdat-local function";
689 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
690 work only when inlined. Cloning them may still lead to better code
691 because ipa-cp will not give up on cloning further. If the function is
692 external this however leads to wrong code because we may end up producing
693 offline copy of the function. */
694 if (DECL_EXTERNAL (node->decl))
695 for (cgraph_edge *edge = node->callees; !reason && edge;
696 edge = edge->next_callee)
697 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
699 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
700 reason = "external function which calls va_arg_pack";
701 if (DECL_FUNCTION_CODE (edge->callee->decl)
702 == BUILT_IN_VA_ARG_PACK_LEN)
703 reason = "external function which calls va_arg_pack_len";
706 if (reason && dump_file && !node->alias && !node->thunk)
707 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
708 node->dump_name (), reason);
710 info->versionable = (reason == NULL);
713 /* Return true if it is at all technically possible to create clones of a
714 NODE. */
716 static bool
717 ipcp_versionable_function_p (struct cgraph_node *node)
719 ipa_node_params *info = ipa_node_params_sum->get (node);
720 return info && info->versionable;
723 /* Structure holding accumulated information about callers of a node. */
725 struct caller_statistics
727 /* If requested (see below), self-recursive call counts are summed into this
728 field. */
729 profile_count rec_count_sum;
730 /* The sum of all ipa counts of all the other (non-recursive) calls. */
731 profile_count count_sum;
732 /* Sum of all frequencies for all calls. */
733 sreal freq_sum;
734 /* Number of calls and hot calls respectively. */
735 int n_calls, n_hot_calls;
736 /* If itself is set up, also count the number of non-self-recursive
737 calls. */
738 int n_nonrec_calls;
739 /* If non-NULL, this is the node itself and calls from it should have their
740 counts included in rec_count_sum and not count_sum. */
741 cgraph_node *itself;
744 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
745 from IGNORED_CALLER are not counted. */
747 static inline void
748 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
750 stats->rec_count_sum = profile_count::zero ();
751 stats->count_sum = profile_count::zero ();
752 stats->n_calls = 0;
753 stats->n_hot_calls = 0;
754 stats->n_nonrec_calls = 0;
755 stats->freq_sum = 0;
756 stats->itself = itself;
759 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
760 non-thunk incoming edges to NODE. */
762 static bool
763 gather_caller_stats (struct cgraph_node *node, void *data)
765 struct caller_statistics *stats = (struct caller_statistics *) data;
766 struct cgraph_edge *cs;
768 for (cs = node->callers; cs; cs = cs->next_caller)
769 if (!cs->caller->thunk)
771 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
772 if (info && info->node_dead)
773 continue;
775 if (cs->count.ipa ().initialized_p ())
777 if (stats->itself && stats->itself == cs->caller)
778 stats->rec_count_sum += cs->count.ipa ();
779 else
780 stats->count_sum += cs->count.ipa ();
782 stats->freq_sum += cs->sreal_frequency ();
783 stats->n_calls++;
784 if (stats->itself && stats->itself != cs->caller)
785 stats->n_nonrec_calls++;
787 if (cs->maybe_hot_p ())
788 stats->n_hot_calls ++;
790 return false;
794 /* Return true if this NODE is viable candidate for cloning. */
796 static bool
797 ipcp_cloning_candidate_p (struct cgraph_node *node)
799 struct caller_statistics stats;
801 gcc_checking_assert (node->has_gimple_body_p ());
803 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
805 if (dump_file)
806 fprintf (dump_file, "Not considering %s for cloning; "
807 "-fipa-cp-clone disabled.\n",
808 node->dump_name ());
809 return false;
812 if (node->optimize_for_size_p ())
814 if (dump_file)
815 fprintf (dump_file, "Not considering %s for cloning; "
816 "optimizing it for size.\n",
817 node->dump_name ());
818 return false;
821 init_caller_stats (&stats);
822 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
824 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
826 if (dump_file)
827 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
828 node->dump_name ());
829 return true;
832 /* When profile is available and function is hot, propagate into it even if
833 calls seems cold; constant propagation can improve function's speed
834 significantly. */
835 if (stats.count_sum > profile_count::zero ()
836 && node->count.ipa ().initialized_p ())
838 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
840 if (dump_file)
841 fprintf (dump_file, "Considering %s for cloning; "
842 "usually called directly.\n",
843 node->dump_name ());
844 return true;
847 if (!stats.n_hot_calls)
849 if (dump_file)
850 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
851 node->dump_name ());
852 return false;
854 if (dump_file)
855 fprintf (dump_file, "Considering %s for cloning.\n",
856 node->dump_name ());
857 return true;
860 template <typename valtype>
861 class value_topo_info
863 public:
864 /* Head of the linked list of topologically sorted values. */
865 ipcp_value<valtype> *values_topo;
866 /* Stack for creating SCCs, represented by a linked list too. */
867 ipcp_value<valtype> *stack;
868 /* Counter driving the algorithm in add_val_to_toposort. */
869 int dfs_counter;
871 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
873 void add_val (ipcp_value<valtype> *cur_val);
874 void propagate_effects ();
877 /* Arrays representing a topological ordering of call graph nodes and a stack
878 of nodes used during constant propagation and also data required to perform
879 topological sort of values and propagation of benefits in the determined
880 order. */
882 class ipa_topo_info
884 public:
885 /* Array with obtained topological order of cgraph nodes. */
886 struct cgraph_node **order;
887 /* Stack of cgraph nodes used during propagation within SCC until all values
888 in the SCC stabilize. */
889 struct cgraph_node **stack;
890 int nnodes, stack_top;
892 value_topo_info<tree> constants;
893 value_topo_info<ipa_polymorphic_call_context> contexts;
895 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
896 constants ()
900 /* Skip edges from and to nodes without ipa_cp enabled.
901 Ignore not available symbols. */
903 static bool
904 ignore_edge_p (cgraph_edge *e)
906 enum availability avail;
907 cgraph_node *ultimate_target
908 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
910 return (avail <= AVAIL_INTERPOSABLE
911 || !opt_for_fn (ultimate_target->decl, optimize)
912 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
915 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
917 static void
918 build_toporder_info (class ipa_topo_info *topo)
920 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
921 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
923 gcc_checking_assert (topo->stack_top == 0);
924 topo->nnodes = ipa_reduced_postorder (topo->order, true,
925 ignore_edge_p);
928 /* Free information about strongly connected components and the arrays in
929 TOPO. */
931 static void
932 free_toporder_info (class ipa_topo_info *topo)
934 ipa_free_postorder_info ();
935 free (topo->order);
936 free (topo->stack);
939 /* Add NODE to the stack in TOPO, unless it is already there. */
941 static inline void
942 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
944 ipa_node_params *info = ipa_node_params_sum->get (node);
945 if (info->node_enqueued)
946 return;
947 info->node_enqueued = 1;
948 topo->stack[topo->stack_top++] = node;
951 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
952 is empty. */
954 static struct cgraph_node *
955 pop_node_from_stack (class ipa_topo_info *topo)
957 if (topo->stack_top)
959 struct cgraph_node *node;
960 topo->stack_top--;
961 node = topo->stack[topo->stack_top];
962 ipa_node_params_sum->get (node)->node_enqueued = 0;
963 return node;
965 else
966 return NULL;
969 /* Set lattice LAT to bottom and return true if it previously was not set as
970 such. */
972 template <typename valtype>
973 inline bool
974 ipcp_lattice<valtype>::set_to_bottom ()
976 bool ret = !bottom;
977 bottom = true;
978 return ret;
981 /* Mark lattice as containing an unknown value and return true if it previously
982 was not marked as such. */
984 template <typename valtype>
985 inline bool
986 ipcp_lattice<valtype>::set_contains_variable ()
988 bool ret = !contains_variable;
989 contains_variable = true;
990 return ret;
993 /* Set all aggregate lattices in PLATS to bottom and return true if they were
994 not previously set as such. */
996 static inline bool
997 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
999 bool ret = !plats->aggs_bottom;
1000 plats->aggs_bottom = true;
1001 return ret;
1004 /* Mark all aggregate lattices in PLATS as containing an unknown value and
1005 return true if they were not previously marked as such. */
1007 static inline bool
1008 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
1010 bool ret = !plats->aggs_contain_variable;
1011 plats->aggs_contain_variable = true;
1012 return ret;
1015 bool
1016 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
1018 return meet_with_1 (&other.m_vr);
1021 /* Meet the current value of the lattice with value range described by VR
1022 lattice. */
1024 bool
1025 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1027 return meet_with_1 (p_vr);
1030 /* Meet the current value of the lattice with value range described by
1031 OTHER_VR lattice. Return TRUE if anything changed. */
1033 bool
1034 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1036 if (bottom_p ())
1037 return false;
1039 if (other_vr->varying_p ())
1040 return set_to_bottom ();
1042 value_range save (m_vr);
1043 m_vr.union_ (*other_vr);
1044 return m_vr != save;
1047 /* Return true if value range information in the lattice is yet unknown. */
1049 bool
1050 ipcp_vr_lattice::top_p () const
1052 return m_vr.undefined_p ();
1055 /* Return true if value range information in the lattice is known to be
1056 unusable. */
1058 bool
1059 ipcp_vr_lattice::bottom_p () const
1061 return m_vr.varying_p ();
1064 /* Set value range information in the lattice to bottom. Return true if it
1065 previously was in a different state. */
1067 bool
1068 ipcp_vr_lattice::set_to_bottom ()
1070 if (m_vr.varying_p ())
1071 return false;
1072 /* ?? We create all sorts of VARYING ranges for floats, structures,
1073 and other types which we cannot handle as ranges. We should
1074 probably avoid handling them throughout the pass, but it's easier
1075 to create a sensible VARYING here and let the lattice
1076 propagate. */
1077 m_vr.set_varying (integer_type_node);
1078 return true;
1081 /* Set lattice value to bottom, if it already isn't the case. */
1083 bool
1084 ipcp_bits_lattice::set_to_bottom ()
1086 if (bottom_p ())
1087 return false;
1088 m_lattice_val = IPA_BITS_VARYING;
1089 m_value = 0;
1090 m_mask = -1;
1091 return true;
1094 /* Set to constant if it isn't already. Only meant to be called
1095 when switching state from TOP. */
1097 bool
1098 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1100 gcc_assert (top_p ());
1101 m_lattice_val = IPA_BITS_CONSTANT;
1102 m_value = wi::bit_and (wi::bit_not (mask), value);
1103 m_mask = mask;
1104 return true;
1107 /* Return true if any of the known bits are non-zero. */
1109 bool
1110 ipcp_bits_lattice::known_nonzero_p () const
1112 if (!constant_p ())
1113 return false;
1114 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1117 /* Convert operand to value, mask form. */
1119 void
1120 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1122 wide_int get_nonzero_bits (const_tree);
1124 if (TREE_CODE (operand) == INTEGER_CST)
1126 *valuep = wi::to_widest (operand);
1127 *maskp = 0;
1129 else
1131 *valuep = 0;
1132 *maskp = -1;
1136 /* Meet operation, similar to ccp_lattice_meet, we xor values
1137 if this->value, value have different values at same bit positions, we want
1138 to drop that bit to varying. Return true if mask is changed.
1139 This function assumes that the lattice value is in CONSTANT state. If
1140 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1142 bool
1143 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1144 unsigned precision, bool drop_all_ones)
1146 gcc_assert (constant_p ());
1148 widest_int old_mask = m_mask;
1149 m_mask = (m_mask | mask) | (m_value ^ value);
1150 if (drop_all_ones)
1151 m_mask |= m_value;
1152 m_value &= ~m_mask;
1154 if (wi::sext (m_mask, precision) == -1)
1155 return set_to_bottom ();
1157 return m_mask != old_mask;
1160 /* Meet the bits lattice with operand
1161 described by <value, mask, sgn, precision. */
1163 bool
1164 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1165 unsigned precision)
1167 if (bottom_p ())
1168 return false;
1170 if (top_p ())
1172 if (wi::sext (mask, precision) == -1)
1173 return set_to_bottom ();
1174 return set_to_constant (value, mask);
1177 return meet_with_1 (value, mask, precision, false);
1180 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1181 if code is binary operation or bit_value_unop (other) if code is unary op.
1182 In the case when code is nop_expr, no adjustment is required. If
1183 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1185 bool
1186 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1187 signop sgn, enum tree_code code, tree operand,
1188 bool drop_all_ones)
1190 if (other.bottom_p ())
1191 return set_to_bottom ();
1193 if (bottom_p () || other.top_p ())
1194 return false;
1196 widest_int adjusted_value, adjusted_mask;
1198 if (TREE_CODE_CLASS (code) == tcc_binary)
1200 tree type = TREE_TYPE (operand);
1201 widest_int o_value, o_mask;
1202 get_value_and_mask (operand, &o_value, &o_mask);
1204 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1205 sgn, precision, other.get_value (), other.get_mask (),
1206 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1208 if (wi::sext (adjusted_mask, precision) == -1)
1209 return set_to_bottom ();
1212 else if (TREE_CODE_CLASS (code) == tcc_unary)
1214 bit_value_unop (code, sgn, precision, &adjusted_value,
1215 &adjusted_mask, sgn, precision, other.get_value (),
1216 other.get_mask ());
1218 if (wi::sext (adjusted_mask, precision) == -1)
1219 return set_to_bottom ();
1222 else
1223 return set_to_bottom ();
1225 if (top_p ())
1227 if (drop_all_ones)
1229 adjusted_mask |= adjusted_value;
1230 adjusted_value &= ~adjusted_mask;
1232 if (wi::sext (adjusted_mask, precision) == -1)
1233 return set_to_bottom ();
1234 return set_to_constant (adjusted_value, adjusted_mask);
1236 else
1237 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1238 drop_all_ones);
1241 /* Dump the contents of the list to FILE. */
1243 void
1244 ipa_argagg_value_list::dump (FILE *f)
1246 bool comma = false;
1247 for (const ipa_argagg_value &av : m_elts)
1249 fprintf (f, "%s %i[%u]=", comma ? "," : "",
1250 av.index, av.unit_offset);
1251 print_generic_expr (f, av.value);
1252 if (av.by_ref)
1253 fprintf (f, "(by_ref)");
1254 comma = true;
1256 fprintf (f, "\n");
1259 /* Dump the contents of the list to stderr. */
1261 void
1262 ipa_argagg_value_list::debug ()
1264 dump (stderr);
1267 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1268 NULL if there is no such constant. */
1270 const ipa_argagg_value *
1271 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1273 ipa_argagg_value key;
1274 key.index = index;
1275 key.unit_offset = unit_offset;
1276 const ipa_argagg_value *res
1277 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1278 [] (const ipa_argagg_value &elt,
1279 const ipa_argagg_value &val)
1281 if (elt.index < val.index)
1282 return true;
1283 if (elt.index > val.index)
1284 return false;
1285 if (elt.unit_offset < val.unit_offset)
1286 return true;
1287 return false;
1290 if (res == m_elts.end ()
1291 || res->index != index
1292 || res->unit_offset != unit_offset)
1293 res = nullptr;
1295 /* TODO: perhaps remove the check (that the underlying array is indeed
1296 sorted) if it turns out it can be too slow? */
1297 if (!flag_checking)
1298 return res;
1300 const ipa_argagg_value *slow_res = NULL;
1301 int prev_index = -1;
1302 unsigned prev_unit_offset = 0;
1303 for (const ipa_argagg_value &av : m_elts)
1305 gcc_assert (prev_index < 0
1306 || prev_index < av.index
1307 || prev_unit_offset < av.unit_offset);
1308 prev_index = av.index;
1309 prev_unit_offset = av.unit_offset;
1310 if (av.index == index
1311 && av.unit_offset == unit_offset)
1312 slow_res = &av;
1314 gcc_assert (res == slow_res);
1316 return res;
1319 /* Return the first item describing a constant stored for parameter with INDEX,
1320 regardless of offset or reference, or NULL if there is no such constant. */
1322 const ipa_argagg_value *
1323 ipa_argagg_value_list::get_elt_for_index (int index) const
1325 const ipa_argagg_value *res
1326 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1327 [] (const ipa_argagg_value &elt, unsigned idx)
1329 return elt.index < idx;
1331 if (res == m_elts.end ()
1332 || res->index != index)
1333 res = nullptr;
1334 return res;
1337 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1338 performing any check of whether value is passed by reference, or NULL_TREE
1339 if there is no such constant. */
1341 tree
1342 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1344 const ipa_argagg_value *av = get_elt (index, unit_offset);
1345 return av ? av->value : NULL_TREE;
1348 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1349 passed by reference or not according to BY_REF, or NULL_TREE if there is
1350 no such constant. */
1352 tree
1353 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1354 bool by_ref) const
1356 const ipa_argagg_value *av = get_elt (index, unit_offset);
1357 if (av && av->by_ref == by_ref)
1358 return av->value;
1359 return NULL_TREE;
1362 /* Return true if all elements present in OTHER are also present in this
1363 list. */
1365 bool
1366 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1368 unsigned j = 0;
1369 for (unsigned i = 0; i < other.m_elts.size (); i++)
1371 unsigned other_index = other.m_elts[i].index;
1372 unsigned other_offset = other.m_elts[i].unit_offset;
1374 while (j < m_elts.size ()
1375 && (m_elts[j].index < other_index
1376 || (m_elts[j].index == other_index
1377 && m_elts[j].unit_offset < other_offset)))
1378 j++;
1380 if (j >= m_elts.size ()
1381 || m_elts[j].index != other_index
1382 || m_elts[j].unit_offset != other_offset
1383 || m_elts[j].by_ref != other.m_elts[i].by_ref
1384 || !m_elts[j].value
1385 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1386 return false;
1388 return true;
1391 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1392 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1393 offsets but skip those which would end up with a negative offset. */
1395 void
1396 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1397 unsigned dest_index,
1398 unsigned unit_delta,
1399 vec<ipa_argagg_value> *res) const
1401 const ipa_argagg_value *av = get_elt_for_index (src_index);
1402 if (!av)
1403 return;
1404 unsigned prev_unit_offset = 0;
1405 bool first = true;
1406 for (; av < m_elts.end (); ++av)
1408 if (av->index > src_index)
1409 return;
1410 if (av->index == src_index
1411 && (av->unit_offset >= unit_delta)
1412 && av->value)
1414 ipa_argagg_value new_av;
1415 gcc_checking_assert (av->value);
1416 new_av.value = av->value;
1417 new_av.unit_offset = av->unit_offset - unit_delta;
1418 new_av.index = dest_index;
1419 new_av.by_ref = av->by_ref;
1421 /* Quick check that the offsets we push are indeed increasing. */
1422 gcc_assert (first
1423 || new_av.unit_offset > prev_unit_offset);
1424 prev_unit_offset = new_av.unit_offset;
1425 first = false;
1427 res->safe_push (new_av);
1432 /* Push to RES information about single lattices describing aggregate values in
1433 PLATS as those describing parameter DEST_INDEX and the original offset minus
1434 UNIT_DELTA. Return true if any item has been pushed to RES. */
1436 static bool
1437 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1438 unsigned unit_delta,
1439 vec<ipa_argagg_value> *res)
1441 if (plats->aggs_contain_variable)
1442 return false;
1444 bool pushed_sth = false;
1445 bool first = true;
1446 unsigned prev_unit_offset = 0;
1447 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1448 if (aglat->is_single_const ()
1449 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1451 ipa_argagg_value iav;
1452 iav.value = aglat->values->value;
1453 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1454 iav.index = dest_index;
1455 iav.by_ref = plats->aggs_by_ref;
1457 gcc_assert (first
1458 || iav.unit_offset > prev_unit_offset);
1459 prev_unit_offset = iav.unit_offset;
1460 first = false;
1462 pushed_sth = true;
1463 res->safe_push (iav);
1465 return pushed_sth;
1468 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1469 Return the number of remaining valid entries. */
1471 static unsigned
1472 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1473 const vec<ipa_argagg_value> &other)
1475 unsigned valid_entries = 0;
1476 unsigned j = 0;
1477 for (unsigned i = 0; i < elts.length (); i++)
1479 if (!elts[i].value)
1480 continue;
1482 unsigned this_index = elts[i].index;
1483 unsigned this_offset = elts[i].unit_offset;
1485 while (j < other.length ()
1486 && (other[j].index < this_index
1487 || (other[j].index == this_index
1488 && other[j].unit_offset < this_offset)))
1489 j++;
1491 if (j >= other.length ())
1493 elts[i].value = NULL_TREE;
1494 continue;
1497 if (other[j].index == this_index
1498 && other[j].unit_offset == this_offset
1499 && other[j].by_ref == elts[i].by_ref
1500 && other[j].value
1501 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1502 valid_entries++;
1503 else
1504 elts[i].value = NULL_TREE;
1506 return valid_entries;
1509 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1510 return true is any of them has not been marked as such so far. */
1512 static inline bool
1513 set_all_contains_variable (class ipcp_param_lattices *plats)
1515 bool ret;
1516 ret = plats->itself.set_contains_variable ();
1517 ret |= plats->ctxlat.set_contains_variable ();
1518 ret |= set_agg_lats_contain_variable (plats);
1519 ret |= plats->bits_lattice.set_to_bottom ();
1520 ret |= plats->m_value_range.set_to_bottom ();
1521 return ret;
1524 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1525 points to by the number of callers to NODE. */
1527 static bool
1528 count_callers (cgraph_node *node, void *data)
1530 int *caller_count = (int *) data;
1532 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1533 /* Local thunks can be handled transparently, but if the thunk cannot
1534 be optimized out, count it as a real use. */
1535 if (!cs->caller->thunk || !cs->caller->local)
1536 ++*caller_count;
1537 return false;
1540 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1541 the one caller of some other node. Set the caller's corresponding flag. */
1543 static bool
1544 set_single_call_flag (cgraph_node *node, void *)
1546 cgraph_edge *cs = node->callers;
1547 /* Local thunks can be handled transparently, skip them. */
1548 while (cs && cs->caller->thunk && cs->caller->local)
1549 cs = cs->next_caller;
1550 if (cs)
1551 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1553 info->node_calling_single_call = true;
1554 return true;
1556 return false;
1559 /* Initialize ipcp_lattices. */
1561 static void
1562 initialize_node_lattices (struct cgraph_node *node)
1564 ipa_node_params *info = ipa_node_params_sum->get (node);
1565 struct cgraph_edge *ie;
1566 bool disable = false, variable = false;
1567 int i;
1569 gcc_checking_assert (node->has_gimple_body_p ());
1571 if (!ipa_get_param_count (info))
1572 disable = true;
1573 else if (node->local)
1575 int caller_count = 0;
1576 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1577 true);
1578 gcc_checking_assert (caller_count > 0);
1579 if (caller_count == 1)
1580 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1581 NULL, true);
1583 else
1585 /* When cloning is allowed, we can assume that externally visible
1586 functions are not called. We will compensate this by cloning
1587 later. */
1588 if (ipcp_versionable_function_p (node)
1589 && ipcp_cloning_candidate_p (node))
1590 variable = true;
1591 else
1592 disable = true;
1595 if (dump_file && (dump_flags & TDF_DETAILS)
1596 && !node->alias && !node->thunk)
1598 fprintf (dump_file, "Initializing lattices of %s\n",
1599 node->dump_name ());
1600 if (disable || variable)
1601 fprintf (dump_file, " Marking all lattices as %s\n",
1602 disable ? "BOTTOM" : "VARIABLE");
1605 auto_vec<bool, 16> surviving_params;
1606 bool pre_modified = false;
1608 clone_info *cinfo = clone_info::get (node);
1610 if (!disable && cinfo && cinfo->param_adjustments)
1612 /* At the moment all IPA optimizations should use the number of
1613 parameters of the prevailing decl as the m_always_copy_start.
1614 Handling any other value would complicate the code below, so for the
1615 time bing let's only assert it is so. */
1616 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1617 == ipa_get_param_count (info))
1618 || cinfo->param_adjustments->m_always_copy_start < 0);
1620 pre_modified = true;
1621 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1623 if (dump_file && (dump_flags & TDF_DETAILS)
1624 && !node->alias && !node->thunk)
1626 bool first = true;
1627 for (int j = 0; j < ipa_get_param_count (info); j++)
1629 if (j < (int) surviving_params.length ()
1630 && surviving_params[j])
1631 continue;
1632 if (first)
1634 fprintf (dump_file,
1635 " The following parameters are dead on arrival:");
1636 first = false;
1638 fprintf (dump_file, " %u", j);
1640 if (!first)
1641 fprintf (dump_file, "\n");
1645 for (i = 0; i < ipa_get_param_count (info); i++)
1647 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1648 if (disable
1649 || !ipa_get_type (info, i)
1650 || (pre_modified && (surviving_params.length () <= (unsigned) i
1651 || !surviving_params[i])))
1653 plats->itself.set_to_bottom ();
1654 plats->ctxlat.set_to_bottom ();
1655 set_agg_lats_to_bottom (plats);
1656 plats->bits_lattice.set_to_bottom ();
1657 plats->m_value_range.m_vr = value_range ();
1658 plats->m_value_range.set_to_bottom ();
1660 else
1662 plats->m_value_range.init ();
1663 if (variable)
1664 set_all_contains_variable (plats);
1668 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1669 if (ie->indirect_info->polymorphic
1670 && ie->indirect_info->param_index >= 0)
1672 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1673 ipa_get_parm_lattices (info,
1674 ie->indirect_info->param_index)->virt_call = 1;
1678 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1679 PARAM_TYPE. */
1681 static bool
1682 ipacp_value_safe_for_type (tree param_type, tree value)
1684 tree val_type = TREE_TYPE (value);
1685 if (param_type == val_type
1686 || useless_type_conversion_p (param_type, val_type)
1687 || fold_convertible_p (param_type, value))
1688 return true;
1689 else
1690 return false;
1693 /* Return the result of a (possibly arithmetic) operation on the constant
1694 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1695 the type of the parameter to which the result is passed. Return
1696 NULL_TREE if that cannot be determined or be considered an
1697 interprocedural invariant. */
1699 static tree
1700 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1701 tree res_type)
1703 tree res;
1705 if (opcode == NOP_EXPR)
1706 return input;
1707 if (!is_gimple_ip_invariant (input))
1708 return NULL_TREE;
1710 if (opcode == ASSERT_EXPR)
1712 if (values_equal_for_ipcp_p (input, operand))
1713 return input;
1714 else
1715 return NULL_TREE;
1718 if (!res_type)
1720 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1721 res_type = boolean_type_node;
1722 else if (expr_type_first_operand_type_p (opcode))
1723 res_type = TREE_TYPE (input);
1724 else
1725 return NULL_TREE;
1728 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1729 res = fold_unary (opcode, res_type, input);
1730 else
1731 res = fold_binary (opcode, res_type, input, operand);
1733 if (res && !is_gimple_ip_invariant (res))
1734 return NULL_TREE;
1736 return res;
1739 /* Return the result of a (possibly arithmetic) pass through jump function
1740 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1741 to which the result is passed. Return NULL_TREE if that cannot be
1742 determined or be considered an interprocedural invariant. */
1744 static tree
1745 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1746 tree res_type)
1748 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1749 input,
1750 ipa_get_jf_pass_through_operand (jfunc),
1751 res_type);
1754 /* Return the result of an ancestor jump function JFUNC on the constant value
1755 INPUT. Return NULL_TREE if that cannot be determined. */
1757 static tree
1758 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1760 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1761 if (TREE_CODE (input) == ADDR_EXPR)
1763 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1764 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1765 if (known_eq (off, 0))
1766 return input;
1767 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1768 return build1 (ADDR_EXPR, TREE_TYPE (input),
1769 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1770 build_int_cst (ptr_type_node, byte_offset)));
1772 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1773 && zerop (input))
1774 return input;
1775 else
1776 return NULL_TREE;
1779 /* Determine whether JFUNC evaluates to a single known constant value and if
1780 so, return it. Otherwise return NULL. INFO describes the caller node or
1781 the one it is inlined to, so that pass-through jump functions can be
1782 evaluated. PARM_TYPE is the type of the parameter to which the result is
1783 passed. */
1785 tree
1786 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1787 tree parm_type)
1789 if (jfunc->type == IPA_JF_CONST)
1790 return ipa_get_jf_constant (jfunc);
1791 else if (jfunc->type == IPA_JF_PASS_THROUGH
1792 || jfunc->type == IPA_JF_ANCESTOR)
1794 tree input;
1795 int idx;
1797 if (jfunc->type == IPA_JF_PASS_THROUGH)
1798 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1799 else
1800 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1802 if (info->ipcp_orig_node)
1803 input = info->known_csts[idx];
1804 else
1806 ipcp_lattice<tree> *lat;
1808 if (!info->lattices
1809 || idx >= ipa_get_param_count (info))
1810 return NULL_TREE;
1811 lat = ipa_get_scalar_lat (info, idx);
1812 if (!lat->is_single_const ())
1813 return NULL_TREE;
1814 input = lat->values->value;
1817 if (!input)
1818 return NULL_TREE;
1820 if (jfunc->type == IPA_JF_PASS_THROUGH)
1821 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1822 else
1823 return ipa_get_jf_ancestor_result (jfunc, input);
1825 else
1826 return NULL_TREE;
1829 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1830 that INFO describes the caller node or the one it is inlined to, CS is the
1831 call graph edge corresponding to JFUNC and CSIDX index of the described
1832 parameter. */
1834 ipa_polymorphic_call_context
1835 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1836 ipa_jump_func *jfunc)
1838 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1839 ipa_polymorphic_call_context ctx;
1840 ipa_polymorphic_call_context *edge_ctx
1841 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1843 if (edge_ctx && !edge_ctx->useless_p ())
1844 ctx = *edge_ctx;
1846 if (jfunc->type == IPA_JF_PASS_THROUGH
1847 || jfunc->type == IPA_JF_ANCESTOR)
1849 ipa_polymorphic_call_context srcctx;
1850 int srcidx;
1851 bool type_preserved = true;
1852 if (jfunc->type == IPA_JF_PASS_THROUGH)
1854 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1855 return ctx;
1856 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1857 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1859 else
1861 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1862 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1864 if (info->ipcp_orig_node)
1866 if (info->known_contexts.exists ())
1867 srcctx = info->known_contexts[srcidx];
1869 else
1871 if (!info->lattices
1872 || srcidx >= ipa_get_param_count (info))
1873 return ctx;
1874 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1875 lat = ipa_get_poly_ctx_lat (info, srcidx);
1876 if (!lat->is_single_const ())
1877 return ctx;
1878 srcctx = lat->values->value;
1880 if (srcctx.useless_p ())
1881 return ctx;
1882 if (jfunc->type == IPA_JF_ANCESTOR)
1883 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1884 if (!type_preserved)
1885 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1886 srcctx.combine_with (ctx);
1887 return srcctx;
1890 return ctx;
1893 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1894 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1895 the result is a range or an anti-range. */
1897 static bool
1898 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1899 value_range *src_vr,
1900 enum tree_code operation,
1901 tree dst_type, tree src_type)
1903 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1904 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1905 return false;
1906 return true;
1909 /* Determine value_range of JFUNC given that INFO describes the caller node or
1910 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1911 and PARM_TYPE of the parameter. */
1913 value_range
1914 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1915 ipa_jump_func *jfunc, tree parm_type)
1917 value_range vr;
1918 if (jfunc->m_vr)
1919 ipa_vr_operation_and_type_effects (&vr,
1920 jfunc->m_vr,
1921 NOP_EXPR, parm_type,
1922 jfunc->m_vr->type ());
1923 if (vr.singleton_p ())
1924 return vr;
1925 if (jfunc->type == IPA_JF_PASS_THROUGH)
1927 int idx;
1928 ipcp_transformation *sum
1929 = ipcp_get_transformation_summary (cs->caller->inlined_to
1930 ? cs->caller->inlined_to
1931 : cs->caller);
1932 if (!sum || !sum->m_vr)
1933 return vr;
1935 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1937 if (!(*sum->m_vr)[idx].known)
1938 return vr;
1939 tree vr_type = ipa_get_type (info, idx);
1940 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1941 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1942 (*sum->m_vr)[idx].type);
1944 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1946 if (TREE_CODE_CLASS (operation) == tcc_unary)
1948 value_range res;
1950 if (ipa_vr_operation_and_type_effects (&res,
1951 &srcvr,
1952 operation, parm_type,
1953 vr_type))
1954 vr.intersect (res);
1956 else
1958 value_range op_res, res;
1959 tree op = ipa_get_jf_pass_through_operand (jfunc);
1960 value_range op_vr (op, op);
1962 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1963 if (ipa_vr_operation_and_type_effects (&res,
1964 &op_res,
1965 NOP_EXPR, parm_type,
1966 vr_type))
1967 vr.intersect (res);
1970 return vr;
1973 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1974 single known constant value and if so, return it. Otherwise return NULL.
1975 NODE and INFO describes the caller node or the one it is inlined to, and
1976 its related info. */
1978 tree
1979 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1980 const ipa_agg_jf_item *item)
1982 tree value = NULL_TREE;
1983 int src_idx;
1985 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1986 return NULL_TREE;
1988 if (item->jftype == IPA_JF_CONST)
1989 return item->value.constant;
1991 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1992 || item->jftype == IPA_JF_LOAD_AGG);
1994 src_idx = item->value.pass_through.formal_id;
1996 if (info->ipcp_orig_node)
1998 if (item->jftype == IPA_JF_PASS_THROUGH)
1999 value = info->known_csts[src_idx];
2000 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
2002 ipa_argagg_value_list avl (ts);
2003 value = avl.get_value (src_idx,
2004 item->value.load_agg.offset / BITS_PER_UNIT,
2005 item->value.load_agg.by_ref);
2008 else if (info->lattices)
2010 class ipcp_param_lattices *src_plats
2011 = ipa_get_parm_lattices (info, src_idx);
2013 if (item->jftype == IPA_JF_PASS_THROUGH)
2015 struct ipcp_lattice<tree> *lat = &src_plats->itself;
2017 if (!lat->is_single_const ())
2018 return NULL_TREE;
2020 value = lat->values->value;
2022 else if (src_plats->aggs
2023 && !src_plats->aggs_bottom
2024 && !src_plats->aggs_contain_variable
2025 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
2027 struct ipcp_agg_lattice *aglat;
2029 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
2031 if (aglat->offset > item->value.load_agg.offset)
2032 break;
2034 if (aglat->offset == item->value.load_agg.offset)
2036 if (aglat->is_single_const ())
2037 value = aglat->values->value;
2038 break;
2044 if (!value)
2045 return NULL_TREE;
2047 if (item->jftype == IPA_JF_LOAD_AGG)
2049 tree load_type = item->value.load_agg.type;
2050 tree value_type = TREE_TYPE (value);
2052 /* Ensure value type is compatible with load type. */
2053 if (!useless_type_conversion_p (load_type, value_type))
2054 return NULL_TREE;
2057 return ipa_get_jf_arith_result (item->value.pass_through.operation,
2058 value,
2059 item->value.pass_through.operand,
2060 item->type);
2063 /* Process all items in AGG_JFUNC relative to caller (or the node the original
2064 caller is inlined to) NODE which described by INFO and push the results to
2065 RES as describing values passed in parameter DST_INDEX. */
2067 void
2068 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
2069 ipa_agg_jump_function *agg_jfunc,
2070 unsigned dst_index,
2071 vec<ipa_argagg_value> *res)
2073 unsigned prev_unit_offset = 0;
2074 bool first = true;
2076 for (const ipa_agg_jf_item &item : agg_jfunc->items)
2078 tree value = ipa_agg_value_from_jfunc (info, node, &item);
2079 if (!value)
2080 continue;
2082 ipa_argagg_value iav;
2083 iav.value = value;
2084 iav.unit_offset = item.offset / BITS_PER_UNIT;
2085 iav.index = dst_index;
2086 iav.by_ref = agg_jfunc->by_ref;
2088 gcc_assert (first
2089 || iav.unit_offset > prev_unit_offset);
2090 prev_unit_offset = iav.unit_offset;
2091 first = false;
2093 res->safe_push (iav);
2097 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
2098 bottom, not containing a variable component and without any known value at
2099 the same time. */
2101 DEBUG_FUNCTION void
2102 ipcp_verify_propagated_values (void)
2104 struct cgraph_node *node;
2106 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2108 ipa_node_params *info = ipa_node_params_sum->get (node);
2109 if (!opt_for_fn (node->decl, flag_ipa_cp)
2110 || !opt_for_fn (node->decl, optimize))
2111 continue;
2112 int i, count = ipa_get_param_count (info);
2114 for (i = 0; i < count; i++)
2116 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
2118 if (!lat->bottom
2119 && !lat->contains_variable
2120 && lat->values_count == 0)
2122 if (dump_file)
2124 symtab->dump (dump_file);
2125 fprintf (dump_file, "\nIPA lattices after constant "
2126 "propagation, before gcc_unreachable:\n");
2127 print_all_lattices (dump_file, true, false);
2130 gcc_unreachable ();
2136 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2138 static bool
2139 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2140 ipa_polymorphic_call_context y)
2142 return x.equal_to (y);
2146 /* Add a new value source to the value represented by THIS, marking that a
2147 value comes from edge CS and (if the underlying jump function is a
2148 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2149 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2150 scalar value of the parameter itself or the offset within an aggregate. */
2152 template <typename valtype>
2153 void
2154 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2155 int src_idx, HOST_WIDE_INT offset)
2157 ipcp_value_source<valtype> *src;
2159 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2160 src->offset = offset;
2161 src->cs = cs;
2162 src->val = src_val;
2163 src->index = src_idx;
2165 src->next = sources;
2166 sources = src;
2169 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
2170 SOURCE and clear all other fields. */
2172 static ipcp_value<tree> *
2173 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2175 ipcp_value<tree> *val;
2177 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2178 val->value = cst;
2179 val->self_recursion_generated_level = same_lat_gen_level;
2180 return val;
2183 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
2184 value to SOURCE and clear all other fields. */
2186 static ipcp_value<ipa_polymorphic_call_context> *
2187 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2188 unsigned same_lat_gen_level)
2190 ipcp_value<ipa_polymorphic_call_context> *val;
2192 val = new (ipcp_poly_ctx_values_pool.allocate ())
2193 ipcp_value<ipa_polymorphic_call_context>();
2194 val->value = ctx;
2195 val->self_recursion_generated_level = same_lat_gen_level;
2196 return val;
2199 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2200 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2201 meaning. OFFSET -1 means the source is scalar and not a part of an
2202 aggregate. If non-NULL, VAL_P records address of existing or newly added
2203 ipcp_value.
2205 If the value is generated for a self-recursive call as a result of an
2206 arithmetic pass-through jump-function acting on a value in the same lattice,
2207 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2208 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2210 template <typename valtype>
2211 bool
2212 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2213 ipcp_value<valtype> *src_val,
2214 int src_idx, HOST_WIDE_INT offset,
2215 ipcp_value<valtype> **val_p,
2216 unsigned same_lat_gen_level)
2218 ipcp_value<valtype> *val, *last_val = NULL;
2220 if (val_p)
2221 *val_p = NULL;
2223 if (bottom)
2224 return false;
2226 for (val = values; val; last_val = val, val = val->next)
2227 if (values_equal_for_ipcp_p (val->value, newval))
2229 if (val_p)
2230 *val_p = val;
2232 if (val->self_recursion_generated_level < same_lat_gen_level)
2233 val->self_recursion_generated_level = same_lat_gen_level;
2235 if (ipa_edge_within_scc (cs))
2237 ipcp_value_source<valtype> *s;
2238 for (s = val->sources; s; s = s->next)
2239 if (s->cs == cs && s->val == src_val)
2240 break;
2241 if (s)
2242 return false;
2245 val->add_source (cs, src_val, src_idx, offset);
2246 return false;
2249 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
2250 param_ipa_cp_value_list_size))
2252 /* We can only free sources, not the values themselves, because sources
2253 of other values in this SCC might point to them. */
2254 for (val = values; val; val = val->next)
2256 while (val->sources)
2258 ipcp_value_source<valtype> *src = val->sources;
2259 val->sources = src->next;
2260 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2263 values = NULL;
2264 return set_to_bottom ();
2267 values_count++;
2268 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2269 val->add_source (cs, src_val, src_idx, offset);
2270 val->next = NULL;
2272 /* Add the new value to end of value list, which can reduce iterations
2273 of propagation stage for recursive function. */
2274 if (last_val)
2275 last_val->next = val;
2276 else
2277 values = val;
2279 if (val_p)
2280 *val_p = val;
2282 return true;
2285 /* A helper function that returns result of operation specified by OPCODE on
2286 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2287 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2288 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2289 the result. */
2291 static tree
2292 get_val_across_arith_op (enum tree_code opcode,
2293 tree opnd1_type,
2294 tree opnd2,
2295 ipcp_value<tree> *src_val,
2296 tree res_type)
2298 tree opnd1 = src_val->value;
2300 /* Skip source values that is incompatible with specified type. */
2301 if (opnd1_type
2302 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2303 return NULL_TREE;
2305 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2308 /* Propagate values through an arithmetic transformation described by a jump
2309 function associated with edge CS, taking values from SRC_LAT and putting
2310 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2311 OPND2 is a constant value if transformation is a binary operation.
2312 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2313 a part of the aggregate. SRC_IDX is the index of the source parameter.
2314 RES_TYPE is the value type of result being propagated into. Return true if
2315 DEST_LAT changed. */
2317 static bool
2318 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2319 enum tree_code opcode,
2320 tree opnd1_type,
2321 tree opnd2,
2322 ipcp_lattice<tree> *src_lat,
2323 ipcp_lattice<tree> *dest_lat,
2324 HOST_WIDE_INT src_offset,
2325 int src_idx,
2326 tree res_type)
2328 ipcp_value<tree> *src_val;
2329 bool ret = false;
2331 /* Due to circular dependencies, propagating within an SCC through arithmetic
2332 transformation would create infinite number of values. But for
2333 self-feeding recursive function, we could allow propagation in a limited
2334 count, and this can enable a simple kind of recursive function versioning.
2335 For other scenario, we would just make lattices bottom. */
2336 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2338 int i;
2340 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2341 param_ipa_cp_max_recursive_depth);
2342 if (src_lat != dest_lat || max_recursive_depth < 1)
2343 return dest_lat->set_contains_variable ();
2345 /* No benefit if recursive execution is in low probability. */
2346 if (cs->sreal_frequency () * 100
2347 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2348 param_ipa_cp_min_recursive_probability))
2349 return dest_lat->set_contains_variable ();
2351 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2353 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2355 /* Now we do not use self-recursively generated value as propagation
2356 source, this is absolutely conservative, but could avoid explosion
2357 of lattice's value space, especially when one recursive function
2358 calls another recursive. */
2359 if (src_val->self_recursion_generated_p ())
2361 ipcp_value_source<tree> *s;
2363 /* If the lattice has already been propagated for the call site,
2364 no need to do that again. */
2365 for (s = src_val->sources; s; s = s->next)
2366 if (s->cs == cs)
2367 return dest_lat->set_contains_variable ();
2369 else
2370 val_seeds.safe_push (src_val);
2373 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2375 /* Recursively generate lattice values with a limited count. */
2376 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2378 for (int j = 1; j < max_recursive_depth; j++)
2380 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2381 src_val, res_type);
2382 if (!cstval
2383 || !ipacp_value_safe_for_type (res_type, cstval))
2384 break;
2386 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2387 src_offset, &src_val, j);
2388 gcc_checking_assert (src_val);
2391 ret |= dest_lat->set_contains_variable ();
2393 else
2394 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2396 /* Now we do not use self-recursively generated value as propagation
2397 source, otherwise it is easy to make value space of normal lattice
2398 overflow. */
2399 if (src_val->self_recursion_generated_p ())
2401 ret |= dest_lat->set_contains_variable ();
2402 continue;
2405 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2406 src_val, res_type);
2407 if (cstval
2408 && ipacp_value_safe_for_type (res_type, cstval))
2409 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2410 src_offset);
2411 else
2412 ret |= dest_lat->set_contains_variable ();
2415 return ret;
2418 /* Propagate values through a pass-through jump function JFUNC associated with
2419 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2420 is the index of the source parameter. PARM_TYPE is the type of the
2421 parameter to which the result is passed. */
2423 static bool
2424 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2425 ipcp_lattice<tree> *src_lat,
2426 ipcp_lattice<tree> *dest_lat, int src_idx,
2427 tree parm_type)
2429 return propagate_vals_across_arith_jfunc (cs,
2430 ipa_get_jf_pass_through_operation (jfunc),
2431 NULL_TREE,
2432 ipa_get_jf_pass_through_operand (jfunc),
2433 src_lat, dest_lat, -1, src_idx, parm_type);
2436 /* Propagate values through an ancestor jump function JFUNC associated with
2437 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2438 is the index of the source parameter. */
2440 static bool
2441 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2442 struct ipa_jump_func *jfunc,
2443 ipcp_lattice<tree> *src_lat,
2444 ipcp_lattice<tree> *dest_lat, int src_idx,
2445 tree param_type)
2447 ipcp_value<tree> *src_val;
2448 bool ret = false;
2450 if (ipa_edge_within_scc (cs))
2451 return dest_lat->set_contains_variable ();
2453 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2455 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2457 if (t && ipacp_value_safe_for_type (param_type, t))
2458 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2459 else
2460 ret |= dest_lat->set_contains_variable ();
2463 return ret;
2466 /* Propagate scalar values across jump function JFUNC that is associated with
2467 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2468 parameter to which the result is passed. */
2470 static bool
2471 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2472 struct ipa_jump_func *jfunc,
2473 ipcp_lattice<tree> *dest_lat,
2474 tree param_type)
2476 if (dest_lat->bottom)
2477 return false;
2479 if (jfunc->type == IPA_JF_CONST)
2481 tree val = ipa_get_jf_constant (jfunc);
2482 if (ipacp_value_safe_for_type (param_type, val))
2483 return dest_lat->add_value (val, cs, NULL, 0);
2484 else
2485 return dest_lat->set_contains_variable ();
2487 else if (jfunc->type == IPA_JF_PASS_THROUGH
2488 || jfunc->type == IPA_JF_ANCESTOR)
2490 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2491 ipcp_lattice<tree> *src_lat;
2492 int src_idx;
2493 bool ret;
2495 if (jfunc->type == IPA_JF_PASS_THROUGH)
2496 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2497 else
2498 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2500 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2501 if (src_lat->bottom)
2502 return dest_lat->set_contains_variable ();
2504 /* If we would need to clone the caller and cannot, do not propagate. */
2505 if (!ipcp_versionable_function_p (cs->caller)
2506 && (src_lat->contains_variable
2507 || (src_lat->values_count > 1)))
2508 return dest_lat->set_contains_variable ();
2510 if (jfunc->type == IPA_JF_PASS_THROUGH)
2511 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2512 dest_lat, src_idx,
2513 param_type);
2514 else
2515 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2516 src_idx, param_type);
2518 if (src_lat->contains_variable)
2519 ret |= dest_lat->set_contains_variable ();
2521 return ret;
2524 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2525 use it for indirect inlining), we should propagate them too. */
2526 return dest_lat->set_contains_variable ();
2529 /* Propagate scalar values across jump function JFUNC that is associated with
2530 edge CS and describes argument IDX and put the values into DEST_LAT. */
2532 static bool
2533 propagate_context_across_jump_function (cgraph_edge *cs,
2534 ipa_jump_func *jfunc, int idx,
2535 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2537 if (dest_lat->bottom)
2538 return false;
2539 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2540 bool ret = false;
2541 bool added_sth = false;
2542 bool type_preserved = true;
2544 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2545 = ipa_get_ith_polymorhic_call_context (args, idx);
2547 if (edge_ctx_ptr)
2548 edge_ctx = *edge_ctx_ptr;
2550 if (jfunc->type == IPA_JF_PASS_THROUGH
2551 || jfunc->type == IPA_JF_ANCESTOR)
2553 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2554 int src_idx;
2555 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2557 /* TODO: Once we figure out how to propagate speculations, it will
2558 probably be a good idea to switch to speculation if type_preserved is
2559 not set instead of punting. */
2560 if (jfunc->type == IPA_JF_PASS_THROUGH)
2562 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2563 goto prop_fail;
2564 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2565 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2567 else
2569 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2570 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2573 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2574 /* If we would need to clone the caller and cannot, do not propagate. */
2575 if (!ipcp_versionable_function_p (cs->caller)
2576 && (src_lat->contains_variable
2577 || (src_lat->values_count > 1)))
2578 goto prop_fail;
2580 ipcp_value<ipa_polymorphic_call_context> *src_val;
2581 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2583 ipa_polymorphic_call_context cur = src_val->value;
2585 if (!type_preserved)
2586 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2587 if (jfunc->type == IPA_JF_ANCESTOR)
2588 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2589 /* TODO: In cases we know how the context is going to be used,
2590 we can improve the result by passing proper OTR_TYPE. */
2591 cur.combine_with (edge_ctx);
2592 if (!cur.useless_p ())
2594 if (src_lat->contains_variable
2595 && !edge_ctx.equal_to (cur))
2596 ret |= dest_lat->set_contains_variable ();
2597 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2598 added_sth = true;
2603 prop_fail:
2604 if (!added_sth)
2606 if (!edge_ctx.useless_p ())
2607 ret |= dest_lat->add_value (edge_ctx, cs);
2608 else
2609 ret |= dest_lat->set_contains_variable ();
2612 return ret;
2615 /* Propagate bits across jfunc that is associated with
2616 edge cs and update dest_lattice accordingly. */
2618 bool
2619 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2620 ipa_jump_func *jfunc,
2621 ipcp_bits_lattice *dest_lattice)
2623 if (dest_lattice->bottom_p ())
2624 return false;
2626 enum availability availability;
2627 cgraph_node *callee = cs->callee->function_symbol (&availability);
2628 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2629 tree parm_type = ipa_get_type (callee_info, idx);
2631 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2632 transform for these cases. Similarly, we can have bad type mismatches
2633 with LTO, avoid doing anything with those too. */
2634 if (!parm_type
2635 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2637 if (dump_file && (dump_flags & TDF_DETAILS))
2638 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2639 "param %i of %s is NULL or unsuitable for bits propagation\n",
2640 idx, cs->callee->dump_name ());
2642 return dest_lattice->set_to_bottom ();
2645 unsigned precision = TYPE_PRECISION (parm_type);
2646 signop sgn = TYPE_SIGN (parm_type);
2648 if (jfunc->type == IPA_JF_PASS_THROUGH
2649 || jfunc->type == IPA_JF_ANCESTOR)
2651 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2652 tree operand = NULL_TREE;
2653 enum tree_code code;
2654 unsigned src_idx;
2655 bool keep_null = false;
2657 if (jfunc->type == IPA_JF_PASS_THROUGH)
2659 code = ipa_get_jf_pass_through_operation (jfunc);
2660 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2661 if (code != NOP_EXPR)
2662 operand = ipa_get_jf_pass_through_operand (jfunc);
2664 else
2666 code = POINTER_PLUS_EXPR;
2667 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2668 unsigned HOST_WIDE_INT offset
2669 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2670 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2671 operand = build_int_cstu (size_type_node, offset);
2674 class ipcp_param_lattices *src_lats
2675 = ipa_get_parm_lattices (caller_info, src_idx);
2677 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2678 for eg consider:
2679 int f(int x)
2681 g (x & 0xff);
2683 Assume lattice for x is bottom, however we can still propagate
2684 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2685 and we store it in jump function during analysis stage. */
2687 if (!src_lats->bits_lattice.bottom_p ())
2689 bool drop_all_ones
2690 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2692 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2693 sgn, code, operand, drop_all_ones);
2697 if (jfunc->bits)
2698 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2699 precision);
2700 else
2701 return dest_lattice->set_to_bottom ();
2704 /* Propagate value range across jump function JFUNC that is associated with
2705 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2706 accordingly. */
2708 static bool
2709 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2710 class ipcp_param_lattices *dest_plats,
2711 tree param_type)
2713 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2715 if (dest_lat->bottom_p ())
2716 return false;
2718 if (!param_type
2719 || (!INTEGRAL_TYPE_P (param_type)
2720 && !POINTER_TYPE_P (param_type)))
2721 return dest_lat->set_to_bottom ();
2723 if (jfunc->type == IPA_JF_PASS_THROUGH)
2725 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2726 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2727 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2728 class ipcp_param_lattices *src_lats
2729 = ipa_get_parm_lattices (caller_info, src_idx);
2730 tree operand_type = ipa_get_type (caller_info, src_idx);
2732 if (src_lats->m_value_range.bottom_p ())
2733 return dest_lat->set_to_bottom ();
2735 value_range vr;
2736 if (TREE_CODE_CLASS (operation) == tcc_unary)
2737 ipa_vr_operation_and_type_effects (&vr,
2738 &src_lats->m_value_range.m_vr,
2739 operation, param_type,
2740 operand_type);
2741 /* A crude way to prevent unbounded number of value range updates
2742 in SCC components. We should allow limited number of updates within
2743 SCC, too. */
2744 else if (!ipa_edge_within_scc (cs))
2746 tree op = ipa_get_jf_pass_through_operand (jfunc);
2747 value_range op_vr (op, op);
2748 value_range op_res,res;
2750 range_fold_binary_expr (&op_res, operation, operand_type,
2751 &src_lats->m_value_range.m_vr, &op_vr);
2752 ipa_vr_operation_and_type_effects (&vr,
2753 &op_res,
2754 NOP_EXPR, param_type,
2755 operand_type);
2757 if (!vr.undefined_p () && !vr.varying_p ())
2759 if (jfunc->m_vr)
2761 value_range jvr;
2762 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2763 NOP_EXPR,
2764 param_type,
2765 jfunc->m_vr->type ()))
2766 vr.intersect (jvr);
2768 return dest_lat->meet_with (&vr);
2771 else if (jfunc->type == IPA_JF_CONST)
2773 tree val = ipa_get_jf_constant (jfunc);
2774 if (TREE_CODE (val) == INTEGER_CST)
2776 val = fold_convert (param_type, val);
2777 if (TREE_OVERFLOW_P (val))
2778 val = drop_tree_overflow (val);
2780 value_range tmpvr (val, val);
2781 return dest_lat->meet_with (&tmpvr);
2785 value_range vr;
2786 if (jfunc->m_vr
2787 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2788 param_type,
2789 jfunc->m_vr->type ()))
2790 return dest_lat->meet_with (&vr);
2791 else
2792 return dest_lat->set_to_bottom ();
2795 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2796 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2797 other cases, return false). If there are no aggregate items, set
2798 aggs_by_ref to NEW_AGGS_BY_REF. */
2800 static bool
2801 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2802 bool new_aggs_by_ref)
2804 if (dest_plats->aggs)
2806 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2808 set_agg_lats_to_bottom (dest_plats);
2809 return true;
2812 else
2813 dest_plats->aggs_by_ref = new_aggs_by_ref;
2814 return false;
2817 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2818 already existing lattice for the given OFFSET and SIZE, marking all skipped
2819 lattices as containing variable and checking for overlaps. If there is no
2820 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2821 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2822 unless there are too many already. If there are two many, return false. If
2823 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2824 skipped lattices were newly marked as containing variable, set *CHANGE to
2825 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2827 static bool
2828 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2829 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2830 struct ipcp_agg_lattice ***aglat,
2831 bool pre_existing, bool *change, int max_agg_items)
2833 gcc_checking_assert (offset >= 0);
2835 while (**aglat && (**aglat)->offset < offset)
2837 if ((**aglat)->offset + (**aglat)->size > offset)
2839 set_agg_lats_to_bottom (dest_plats);
2840 return false;
2842 *change |= (**aglat)->set_contains_variable ();
2843 *aglat = &(**aglat)->next;
2846 if (**aglat && (**aglat)->offset == offset)
2848 if ((**aglat)->size != val_size)
2850 set_agg_lats_to_bottom (dest_plats);
2851 return false;
2853 gcc_assert (!(**aglat)->next
2854 || (**aglat)->next->offset >= offset + val_size);
2855 return true;
2857 else
2859 struct ipcp_agg_lattice *new_al;
2861 if (**aglat && (**aglat)->offset < offset + val_size)
2863 set_agg_lats_to_bottom (dest_plats);
2864 return false;
2866 if (dest_plats->aggs_count == max_agg_items)
2867 return false;
2868 dest_plats->aggs_count++;
2869 new_al = ipcp_agg_lattice_pool.allocate ();
2870 memset (new_al, 0, sizeof (*new_al));
2872 new_al->offset = offset;
2873 new_al->size = val_size;
2874 new_al->contains_variable = pre_existing;
2876 new_al->next = **aglat;
2877 **aglat = new_al;
2878 return true;
2882 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2883 containing an unknown value. */
2885 static bool
2886 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2888 bool ret = false;
2889 while (aglat)
2891 ret |= aglat->set_contains_variable ();
2892 aglat = aglat->next;
2894 return ret;
2897 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2898 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2899 parameter used for lattice value sources. Return true if DEST_PLATS changed
2900 in any way. */
2902 static bool
2903 merge_aggregate_lattices (struct cgraph_edge *cs,
2904 class ipcp_param_lattices *dest_plats,
2905 class ipcp_param_lattices *src_plats,
2906 int src_idx, HOST_WIDE_INT offset_delta)
2908 bool pre_existing = dest_plats->aggs != NULL;
2909 struct ipcp_agg_lattice **dst_aglat;
2910 bool ret = false;
2912 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2913 return true;
2914 if (src_plats->aggs_bottom)
2915 return set_agg_lats_contain_variable (dest_plats);
2916 if (src_plats->aggs_contain_variable)
2917 ret |= set_agg_lats_contain_variable (dest_plats);
2918 dst_aglat = &dest_plats->aggs;
2920 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2921 param_ipa_max_agg_items);
2922 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2923 src_aglat;
2924 src_aglat = src_aglat->next)
2926 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2928 if (new_offset < 0)
2929 continue;
2930 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2931 &dst_aglat, pre_existing, &ret, max_agg_items))
2933 struct ipcp_agg_lattice *new_al = *dst_aglat;
2935 dst_aglat = &(*dst_aglat)->next;
2936 if (src_aglat->bottom)
2938 ret |= new_al->set_contains_variable ();
2939 continue;
2941 if (src_aglat->contains_variable)
2942 ret |= new_al->set_contains_variable ();
2943 for (ipcp_value<tree> *val = src_aglat->values;
2944 val;
2945 val = val->next)
2946 ret |= new_al->add_value (val->value, cs, val, src_idx,
2947 src_aglat->offset);
2949 else if (dest_plats->aggs_bottom)
2950 return true;
2952 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2953 return ret;
2956 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2957 pass-through JFUNC and if so, whether it has conform and conforms to the
2958 rules about propagating values passed by reference. */
2960 static bool
2961 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2962 struct ipa_jump_func *jfunc)
2964 return src_plats->aggs
2965 && (!src_plats->aggs_by_ref
2966 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2969 /* Propagate values through ITEM, jump function for a part of an aggregate,
2970 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2971 associated with the jump function. Return true if AGLAT changed in any
2972 way. */
2974 static bool
2975 propagate_aggregate_lattice (struct cgraph_edge *cs,
2976 struct ipa_agg_jf_item *item,
2977 struct ipcp_agg_lattice *aglat)
2979 class ipa_node_params *caller_info;
2980 class ipcp_param_lattices *src_plats;
2981 struct ipcp_lattice<tree> *src_lat;
2982 HOST_WIDE_INT src_offset;
2983 int src_idx;
2984 tree load_type;
2985 bool ret;
2987 if (item->jftype == IPA_JF_CONST)
2989 tree value = item->value.constant;
2991 gcc_checking_assert (is_gimple_ip_invariant (value));
2992 return aglat->add_value (value, cs, NULL, 0);
2995 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2996 || item->jftype == IPA_JF_LOAD_AGG);
2998 caller_info = ipa_node_params_sum->get (cs->caller);
2999 src_idx = item->value.pass_through.formal_id;
3000 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3002 if (item->jftype == IPA_JF_PASS_THROUGH)
3004 load_type = NULL_TREE;
3005 src_lat = &src_plats->itself;
3006 src_offset = -1;
3008 else
3010 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
3011 struct ipcp_agg_lattice *src_aglat;
3013 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
3014 if (src_aglat->offset >= load_offset)
3015 break;
3017 load_type = item->value.load_agg.type;
3018 if (!src_aglat
3019 || src_aglat->offset > load_offset
3020 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
3021 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
3022 return aglat->set_contains_variable ();
3024 src_lat = src_aglat;
3025 src_offset = load_offset;
3028 if (src_lat->bottom
3029 || (!ipcp_versionable_function_p (cs->caller)
3030 && !src_lat->is_single_const ()))
3031 return aglat->set_contains_variable ();
3033 ret = propagate_vals_across_arith_jfunc (cs,
3034 item->value.pass_through.operation,
3035 load_type,
3036 item->value.pass_through.operand,
3037 src_lat, aglat,
3038 src_offset,
3039 src_idx,
3040 item->type);
3042 if (src_lat->contains_variable)
3043 ret |= aglat->set_contains_variable ();
3045 return ret;
3048 /* Propagate scalar values across jump function JFUNC that is associated with
3049 edge CS and put the values into DEST_LAT. */
3051 static bool
3052 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
3053 struct ipa_jump_func *jfunc,
3054 class ipcp_param_lattices *dest_plats)
3056 bool ret = false;
3058 if (dest_plats->aggs_bottom)
3059 return false;
3061 if (jfunc->type == IPA_JF_PASS_THROUGH
3062 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3064 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3065 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
3066 class ipcp_param_lattices *src_plats;
3068 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3069 if (agg_pass_through_permissible_p (src_plats, jfunc))
3071 /* Currently we do not produce clobber aggregate jump
3072 functions, replace with merging when we do. */
3073 gcc_assert (!jfunc->agg.items);
3074 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
3075 src_idx, 0);
3076 return ret;
3079 else if (jfunc->type == IPA_JF_ANCESTOR
3080 && ipa_get_jf_ancestor_agg_preserved (jfunc))
3082 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
3083 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
3084 class ipcp_param_lattices *src_plats;
3086 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
3087 if (src_plats->aggs && src_plats->aggs_by_ref)
3089 /* Currently we do not produce clobber aggregate jump
3090 functions, replace with merging when we do. */
3091 gcc_assert (!jfunc->agg.items);
3092 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
3093 ipa_get_jf_ancestor_offset (jfunc));
3095 else if (!src_plats->aggs_by_ref)
3096 ret |= set_agg_lats_to_bottom (dest_plats);
3097 else
3098 ret |= set_agg_lats_contain_variable (dest_plats);
3099 return ret;
3102 if (jfunc->agg.items)
3104 bool pre_existing = dest_plats->aggs != NULL;
3105 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
3106 struct ipa_agg_jf_item *item;
3107 int i;
3109 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
3110 return true;
3112 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
3113 param_ipa_max_agg_items);
3114 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
3116 HOST_WIDE_INT val_size;
3118 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
3119 continue;
3120 val_size = tree_to_shwi (TYPE_SIZE (item->type));
3122 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
3123 &aglat, pre_existing, &ret, max_agg_items))
3125 ret |= propagate_aggregate_lattice (cs, item, *aglat);
3126 aglat = &(*aglat)->next;
3128 else if (dest_plats->aggs_bottom)
3129 return true;
3132 ret |= set_chain_of_aglats_contains_variable (*aglat);
3134 else
3135 ret |= set_agg_lats_contain_variable (dest_plats);
3137 return ret;
3140 /* Return true if on the way cfrom CS->caller to the final (non-alias and
3141 non-thunk) destination, the call passes through a thunk. */
3143 static bool
3144 call_passes_through_thunk (cgraph_edge *cs)
3146 cgraph_node *alias_or_thunk = cs->callee;
3147 while (alias_or_thunk->alias)
3148 alias_or_thunk = alias_or_thunk->get_alias_target ();
3149 return alias_or_thunk->thunk;
3152 /* Propagate constants from the caller to the callee of CS. INFO describes the
3153 caller. */
3155 static bool
3156 propagate_constants_across_call (struct cgraph_edge *cs)
3158 class ipa_node_params *callee_info;
3159 enum availability availability;
3160 cgraph_node *callee;
3161 class ipa_edge_args *args;
3162 bool ret = false;
3163 int i, args_count, parms_count;
3165 callee = cs->callee->function_symbol (&availability);
3166 if (!callee->definition)
3167 return false;
3168 gcc_checking_assert (callee->has_gimple_body_p ());
3169 callee_info = ipa_node_params_sum->get (callee);
3170 if (!callee_info)
3171 return false;
3173 args = ipa_edge_args_sum->get (cs);
3174 parms_count = ipa_get_param_count (callee_info);
3175 if (parms_count == 0)
3176 return false;
3177 if (!args
3178 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3179 || !opt_for_fn (cs->caller->decl, optimize))
3181 for (i = 0; i < parms_count; i++)
3182 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3183 i));
3184 return ret;
3186 args_count = ipa_get_cs_argument_count (args);
3188 /* If this call goes through a thunk we must not propagate to the first (0th)
3189 parameter. However, we might need to uncover a thunk from below a series
3190 of aliases first. */
3191 if (call_passes_through_thunk (cs))
3193 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
3194 0));
3195 i = 1;
3197 else
3198 i = 0;
3200 for (; (i < args_count) && (i < parms_count); i++)
3202 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3203 class ipcp_param_lattices *dest_plats;
3204 tree param_type = ipa_get_type (callee_info, i);
3206 dest_plats = ipa_get_parm_lattices (callee_info, i);
3207 if (availability == AVAIL_INTERPOSABLE)
3208 ret |= set_all_contains_variable (dest_plats);
3209 else
3211 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3212 &dest_plats->itself,
3213 param_type);
3214 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3215 &dest_plats->ctxlat);
3217 |= propagate_bits_across_jump_function (cs, i, jump_func,
3218 &dest_plats->bits_lattice);
3219 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3220 dest_plats);
3221 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3222 ret |= propagate_vr_across_jump_function (cs, jump_func,
3223 dest_plats, param_type);
3224 else
3225 ret |= dest_plats->m_value_range.set_to_bottom ();
3228 for (; i < parms_count; i++)
3229 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3231 return ret;
3234 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3235 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3236 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3237 KNOWN_AGGS is ignored. */
3239 static tree
3240 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3241 const vec<tree> &known_csts,
3242 const vec<ipa_polymorphic_call_context> &known_contexts,
3243 const ipa_argagg_value_list &avs,
3244 bool *speculative)
3246 int param_index = ie->indirect_info->param_index;
3247 HOST_WIDE_INT anc_offset;
3248 tree t = NULL;
3249 tree target = NULL;
3251 *speculative = false;
3253 if (param_index == -1)
3254 return NULL_TREE;
3256 if (!ie->indirect_info->polymorphic)
3258 tree t = NULL;
3260 if (ie->indirect_info->agg_contents)
3262 t = NULL;
3263 if ((unsigned) param_index < known_csts.length ()
3264 && known_csts[param_index])
3265 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3266 ie->indirect_info->offset,
3267 ie->indirect_info->by_ref);
3269 if (!t && ie->indirect_info->guaranteed_unmodified)
3270 t = avs.get_value (param_index,
3271 ie->indirect_info->offset / BITS_PER_UNIT,
3272 ie->indirect_info->by_ref);
3274 else if ((unsigned) param_index < known_csts.length ())
3275 t = known_csts[param_index];
3277 if (t
3278 && TREE_CODE (t) == ADDR_EXPR
3279 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3280 return TREE_OPERAND (t, 0);
3281 else
3282 return NULL_TREE;
3285 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3286 return NULL_TREE;
3288 gcc_assert (!ie->indirect_info->agg_contents);
3289 gcc_assert (!ie->indirect_info->by_ref);
3290 anc_offset = ie->indirect_info->offset;
3292 t = NULL;
3294 if ((unsigned) param_index < known_csts.length ()
3295 && known_csts[param_index])
3296 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3297 ie->indirect_info->offset, true);
3299 /* Try to work out value of virtual table pointer value in replacements. */
3300 /* or known aggregate values. */
3301 if (!t)
3302 t = avs.get_value (param_index,
3303 ie->indirect_info->offset / BITS_PER_UNIT,
3304 true);
3306 /* If we found the virtual table pointer, lookup the target. */
3307 if (t)
3309 tree vtable;
3310 unsigned HOST_WIDE_INT offset;
3311 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3313 bool can_refer;
3314 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3315 vtable, offset, &can_refer);
3316 if (can_refer)
3318 if (!target
3319 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3320 || !possible_polymorphic_call_target_p
3321 (ie, cgraph_node::get (target)))
3323 /* Do not speculate builtin_unreachable, it is stupid! */
3324 if (ie->indirect_info->vptr_changed)
3325 return NULL;
3326 target = ipa_impossible_devirt_target (ie, target);
3328 *speculative = ie->indirect_info->vptr_changed;
3329 if (!*speculative)
3330 return target;
3335 /* Do we know the constant value of pointer? */
3336 if (!t && (unsigned) param_index < known_csts.length ())
3337 t = known_csts[param_index];
3339 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3341 ipa_polymorphic_call_context context;
3342 if (known_contexts.length () > (unsigned int) param_index)
3344 context = known_contexts[param_index];
3345 context.offset_by (anc_offset);
3346 if (ie->indirect_info->vptr_changed)
3347 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3348 ie->indirect_info->otr_type);
3349 if (t)
3351 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3352 (t, ie->indirect_info->otr_type, anc_offset);
3353 if (!ctx2.useless_p ())
3354 context.combine_with (ctx2, ie->indirect_info->otr_type);
3357 else if (t)
3359 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3360 anc_offset);
3361 if (ie->indirect_info->vptr_changed)
3362 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3363 ie->indirect_info->otr_type);
3365 else
3366 return NULL_TREE;
3368 vec <cgraph_node *>targets;
3369 bool final;
3371 targets = possible_polymorphic_call_targets
3372 (ie->indirect_info->otr_type,
3373 ie->indirect_info->otr_token,
3374 context, &final);
3375 if (!final || targets.length () > 1)
3377 struct cgraph_node *node;
3378 if (*speculative)
3379 return target;
3380 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3381 || ie->speculative || !ie->maybe_hot_p ())
3382 return NULL;
3383 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3384 ie->indirect_info->otr_token,
3385 context);
3386 if (node)
3388 *speculative = true;
3389 target = node->decl;
3391 else
3392 return NULL;
3394 else
3396 *speculative = false;
3397 if (targets.length () == 1)
3398 target = targets[0]->decl;
3399 else
3400 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3403 if (target && !possible_polymorphic_call_target_p (ie,
3404 cgraph_node::get (target)))
3406 if (*speculative)
3407 return NULL;
3408 target = ipa_impossible_devirt_target (ie, target);
3411 return target;
3414 /* If an indirect edge IE can be turned into a direct one based on data in
3415 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3416 whether the discovered target is only speculative guess. */
3418 tree
3419 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3420 ipa_call_arg_values *avals,
3421 bool *speculative)
3423 ipa_argagg_value_list avl (avals);
3424 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3425 avals->m_known_contexts,
3426 avl, speculative);
3429 /* Calculate devirtualization time bonus for NODE, assuming we know information
3430 about arguments stored in AVALS. */
3432 static int
3433 devirtualization_time_bonus (struct cgraph_node *node,
3434 ipa_auto_call_arg_values *avals)
3436 struct cgraph_edge *ie;
3437 int res = 0;
3439 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3441 struct cgraph_node *callee;
3442 class ipa_fn_summary *isummary;
3443 enum availability avail;
3444 tree target;
3445 bool speculative;
3447 ipa_argagg_value_list avl (avals);
3448 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3449 avals->m_known_contexts,
3450 avl, &speculative);
3451 if (!target)
3452 continue;
3454 /* Only bare minimum benefit for clearly un-inlineable targets. */
3455 res += 1;
3456 callee = cgraph_node::get (target);
3457 if (!callee || !callee->definition)
3458 continue;
3459 callee = callee->function_symbol (&avail);
3460 if (avail < AVAIL_AVAILABLE)
3461 continue;
3462 isummary = ipa_fn_summaries->get (callee);
3463 if (!isummary || !isummary->inlinable)
3464 continue;
3466 int size = ipa_size_summaries->get (callee)->size;
3467 /* FIXME: The values below need re-considering and perhaps also
3468 integrating into the cost metrics, at lest in some very basic way. */
3469 int max_inline_insns_auto
3470 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3471 if (size <= max_inline_insns_auto / 4)
3472 res += 31 / ((int)speculative + 1);
3473 else if (size <= max_inline_insns_auto / 2)
3474 res += 15 / ((int)speculative + 1);
3475 else if (size <= max_inline_insns_auto
3476 || DECL_DECLARED_INLINE_P (callee->decl))
3477 res += 7 / ((int)speculative + 1);
3480 return res;
3483 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3485 static int
3486 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3488 int result = 0;
3489 ipa_hints hints = estimates.hints;
3490 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3491 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3493 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3495 if (hints & INLINE_HINT_loop_iterations)
3496 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3498 if (hints & INLINE_HINT_loop_stride)
3499 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3501 return result;
3504 /* If there is a reason to penalize the function described by INFO in the
3505 cloning goodness evaluation, do so. */
3507 static inline sreal
3508 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3509 sreal evaluation)
3511 if (info->node_within_scc && !info->node_is_self_scc)
3512 evaluation = (evaluation
3513 * (100 - opt_for_fn (node->decl,
3514 param_ipa_cp_recursion_penalty))) / 100;
3516 if (info->node_calling_single_call)
3517 evaluation = (evaluation
3518 * (100 - opt_for_fn (node->decl,
3519 param_ipa_cp_single_call_penalty)))
3520 / 100;
3522 return evaluation;
3525 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3526 and SIZE_COST and with the sum of frequencies of incoming edges to the
3527 potential new clone in FREQUENCIES. */
3529 static bool
3530 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3531 sreal freq_sum, profile_count count_sum,
3532 int size_cost)
3534 if (time_benefit == 0
3535 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3536 || node->optimize_for_size_p ())
3537 return false;
3539 gcc_assert (size_cost > 0);
3541 ipa_node_params *info = ipa_node_params_sum->get (node);
3542 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3543 if (count_sum.nonzero_p ())
3545 gcc_assert (base_count.nonzero_p ());
3546 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3547 sreal evaluation = (time_benefit * factor) / size_cost;
3548 evaluation = incorporate_penalties (node, info, evaluation);
3549 evaluation *= 1000;
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3553 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3554 "size: %i, count_sum: ", time_benefit.to_double (),
3555 size_cost);
3556 count_sum.dump (dump_file);
3557 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3558 info->node_within_scc
3559 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3560 info->node_calling_single_call ? ", single_call" : "",
3561 evaluation.to_double (), eval_threshold);
3564 return evaluation.to_int () >= eval_threshold;
3566 else
3568 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3569 evaluation = incorporate_penalties (node, info, evaluation);
3570 evaluation *= 1000;
3572 if (dump_file && (dump_flags & TDF_DETAILS))
3573 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3574 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3575 "threshold: %i\n",
3576 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3577 info->node_within_scc
3578 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3579 info->node_calling_single_call ? ", single_call" : "",
3580 evaluation.to_double (), eval_threshold);
3582 return evaluation.to_int () >= eval_threshold;
3586 /* Grow vectors in AVALS and fill them with information about values of
3587 parameters that are known to be independent of the context. Only calculate
3588 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3589 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3590 parameters will be stored in it.
3592 TODO: Also grow context independent value range vectors. */
3594 static bool
3595 gather_context_independent_values (class ipa_node_params *info,
3596 ipa_auto_call_arg_values *avals,
3597 bool calculate_aggs,
3598 int *removable_params_cost)
3600 int i, count = ipa_get_param_count (info);
3601 bool ret = false;
3603 avals->m_known_vals.safe_grow_cleared (count, true);
3604 avals->m_known_contexts.safe_grow_cleared (count, true);
3606 if (removable_params_cost)
3607 *removable_params_cost = 0;
3609 for (i = 0; i < count; i++)
3611 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3612 ipcp_lattice<tree> *lat = &plats->itself;
3614 if (lat->is_single_const ())
3616 ipcp_value<tree> *val = lat->values;
3617 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3618 avals->m_known_vals[i] = val->value;
3619 if (removable_params_cost)
3620 *removable_params_cost
3621 += estimate_move_cost (TREE_TYPE (val->value), false);
3622 ret = true;
3624 else if (removable_params_cost
3625 && !ipa_is_param_used (info, i))
3626 *removable_params_cost
3627 += ipa_get_param_move_cost (info, i);
3629 if (!ipa_is_param_used (info, i))
3630 continue;
3632 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3633 /* Do not account known context as reason for cloning. We can see
3634 if it permits devirtualization. */
3635 if (ctxlat->is_single_const ())
3636 avals->m_known_contexts[i] = ctxlat->values->value;
3638 if (calculate_aggs)
3639 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3642 return ret;
3645 /* Perform time and size measurement of NODE with the context given in AVALS,
3646 calculate the benefit compared to the node without specialization and store
3647 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3648 context-independent or unused removable parameters and EST_MOVE_COST, the
3649 estimated movement of the considered parameter. */
3651 static void
3652 perform_estimation_of_a_value (cgraph_node *node,
3653 ipa_auto_call_arg_values *avals,
3654 int removable_params_cost, int est_move_cost,
3655 ipcp_value_base *val)
3657 sreal time_benefit;
3658 ipa_call_estimates estimates;
3660 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3662 /* Extern inline functions have no cloning local time benefits because they
3663 will be inlined anyway. The only reason to clone them is if it enables
3664 optimization in any of the functions they call. */
3665 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3666 time_benefit = 0;
3667 else
3668 time_benefit = (estimates.nonspecialized_time - estimates.time)
3669 + (devirtualization_time_bonus (node, avals)
3670 + hint_time_bonus (node, estimates)
3671 + removable_params_cost + est_move_cost);
3673 int size = estimates.size;
3674 gcc_checking_assert (size >=0);
3675 /* The inliner-heuristics based estimates may think that in certain
3676 contexts some functions do not have any size at all but we want
3677 all specializations to have at least a tiny cost, not least not to
3678 divide by zero. */
3679 if (size == 0)
3680 size = 1;
3682 val->local_time_benefit = time_benefit;
3683 val->local_size_cost = size;
3686 /* Get the overall limit oof growth based on parameters extracted from growth.
3687 it does not really make sense to mix functions with different overall growth
3688 limits but it is possible and if it happens, we do not want to select one
3689 limit at random. */
3691 static long
3692 get_max_overall_size (cgraph_node *node)
3694 long max_new_size = orig_overall_size;
3695 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3696 if (max_new_size < large_unit)
3697 max_new_size = large_unit;
3698 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3699 max_new_size += max_new_size * unit_growth / 100 + 1;
3700 return max_new_size;
3703 /* Return true if NODE should be cloned just for a parameter removal, possibly
3704 dumping a reason if not. */
3706 static bool
3707 clone_for_param_removal_p (cgraph_node *node)
3709 if (!node->can_change_signature)
3711 if (dump_file && (dump_flags & TDF_DETAILS))
3712 fprintf (dump_file, " Not considering cloning to remove parameters, "
3713 "function cannot change signature.\n");
3714 return false;
3716 if (node->can_be_local_p ())
3718 if (dump_file && (dump_flags & TDF_DETAILS))
3719 fprintf (dump_file, " Not considering cloning to remove parameters, "
3720 "IPA-SRA can do it potentially better.\n");
3721 return false;
3723 return true;
3726 /* Iterate over known values of parameters of NODE and estimate the local
3727 effects in terms of time and size they have. */
3729 static void
3730 estimate_local_effects (struct cgraph_node *node)
3732 ipa_node_params *info = ipa_node_params_sum->get (node);
3733 int count = ipa_get_param_count (info);
3734 bool always_const;
3735 int removable_params_cost;
3737 if (!count || !ipcp_versionable_function_p (node))
3738 return;
3740 if (dump_file && (dump_flags & TDF_DETAILS))
3741 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3743 ipa_auto_call_arg_values avals;
3744 always_const = gather_context_independent_values (info, &avals, true,
3745 &removable_params_cost);
3746 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3747 if (always_const || devirt_bonus
3748 || (removable_params_cost && clone_for_param_removal_p (node)))
3750 struct caller_statistics stats;
3751 ipa_call_estimates estimates;
3753 init_caller_stats (&stats);
3754 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3755 false);
3756 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3757 sreal time = estimates.nonspecialized_time - estimates.time;
3758 time += devirt_bonus;
3759 time += hint_time_bonus (node, estimates);
3760 time += removable_params_cost;
3761 int size = estimates.size - stats.n_calls * removable_params_cost;
3763 if (dump_file)
3764 fprintf (dump_file, " - context independent values, size: %i, "
3765 "time_benefit: %f\n", size, (time).to_double ());
3767 if (size <= 0 || node->local)
3769 info->do_clone_for_all_contexts = true;
3771 if (dump_file)
3772 fprintf (dump_file, " Decided to specialize for all "
3773 "known contexts, code not going to grow.\n");
3775 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3776 stats.count_sum, size))
3778 if (size + overall_size <= get_max_overall_size (node))
3780 info->do_clone_for_all_contexts = true;
3781 overall_size += size;
3783 if (dump_file)
3784 fprintf (dump_file, " Decided to specialize for all "
3785 "known contexts, growth (to %li) deemed "
3786 "beneficial.\n", overall_size);
3788 else if (dump_file && (dump_flags & TDF_DETAILS))
3789 fprintf (dump_file, " Not cloning for all contexts because "
3790 "maximum unit size would be reached with %li.\n",
3791 size + overall_size);
3793 else if (dump_file && (dump_flags & TDF_DETAILS))
3794 fprintf (dump_file, " Not cloning for all contexts because "
3795 "!good_cloning_opportunity_p.\n");
3799 for (int i = 0; i < count; i++)
3801 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3802 ipcp_lattice<tree> *lat = &plats->itself;
3803 ipcp_value<tree> *val;
3805 if (lat->bottom
3806 || !lat->values
3807 || avals.m_known_vals[i])
3808 continue;
3810 for (val = lat->values; val; val = val->next)
3812 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3813 avals.m_known_vals[i] = val->value;
3815 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3816 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3817 emc, val);
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, " - estimates for value ");
3822 print_ipcp_constant_value (dump_file, val->value);
3823 fprintf (dump_file, " for ");
3824 ipa_dump_param (dump_file, info, i);
3825 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3826 val->local_time_benefit.to_double (),
3827 val->local_size_cost);
3830 avals.m_known_vals[i] = NULL_TREE;
3833 for (int i = 0; i < count; i++)
3835 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3837 if (!plats->virt_call)
3838 continue;
3840 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3841 ipcp_value<ipa_polymorphic_call_context> *val;
3843 if (ctxlat->bottom
3844 || !ctxlat->values
3845 || !avals.m_known_contexts[i].useless_p ())
3846 continue;
3848 for (val = ctxlat->values; val; val = val->next)
3850 avals.m_known_contexts[i] = val->value;
3851 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3852 0, val);
3854 if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, " - estimates for polymorphic context ");
3857 print_ipcp_constant_value (dump_file, val->value);
3858 fprintf (dump_file, " for ");
3859 ipa_dump_param (dump_file, info, i);
3860 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3861 val->local_time_benefit.to_double (),
3862 val->local_size_cost);
3865 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3868 unsigned all_ctx_len = avals.m_known_aggs.length ();
3869 auto_vec<ipa_argagg_value, 32> all_ctx;
3870 all_ctx.reserve_exact (all_ctx_len);
3871 all_ctx.splice (avals.m_known_aggs);
3872 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3874 unsigned j = 0;
3875 for (int index = 0; index < count; index++)
3877 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3879 if (plats->aggs_bottom || !plats->aggs)
3880 continue;
3882 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3884 ipcp_value<tree> *val;
3885 if (aglat->bottom || !aglat->values
3886 /* If the following is true, the one value is already part of all
3887 context estimations. */
3888 || (!plats->aggs_contain_variable
3889 && aglat->is_single_const ()))
3890 continue;
3892 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3893 while (j < all_ctx_len
3894 && (all_ctx[j].index < index
3895 || (all_ctx[j].index == index
3896 && all_ctx[j].unit_offset < unit_offset)))
3898 avals.m_known_aggs[j] = all_ctx[j];
3899 j++;
3902 for (unsigned k = j; k < all_ctx_len; k++)
3903 avals.m_known_aggs[k+1] = all_ctx[k];
3905 for (val = aglat->values; val; val = val->next)
3907 avals.m_known_aggs[j].value = val->value;
3908 avals.m_known_aggs[j].unit_offset = unit_offset;
3909 avals.m_known_aggs[j].index = index;
3910 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3912 perform_estimation_of_a_value (node, &avals,
3913 removable_params_cost, 0, val);
3915 if (dump_file && (dump_flags & TDF_DETAILS))
3917 fprintf (dump_file, " - estimates for value ");
3918 print_ipcp_constant_value (dump_file, val->value);
3919 fprintf (dump_file, " for ");
3920 ipa_dump_param (dump_file, info, index);
3921 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3922 "]: time_benefit: %g, size: %i\n",
3923 plats->aggs_by_ref ? "ref " : "",
3924 aglat->offset,
3925 val->local_time_benefit.to_double (),
3926 val->local_size_cost);
3934 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3935 topological sort of values. */
3937 template <typename valtype>
3938 void
3939 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3941 ipcp_value_source<valtype> *src;
3943 if (cur_val->dfs)
3944 return;
3946 dfs_counter++;
3947 cur_val->dfs = dfs_counter;
3948 cur_val->low_link = dfs_counter;
3950 cur_val->topo_next = stack;
3951 stack = cur_val;
3952 cur_val->on_stack = true;
3954 for (src = cur_val->sources; src; src = src->next)
3955 if (src->val)
3957 if (src->val->dfs == 0)
3959 add_val (src->val);
3960 if (src->val->low_link < cur_val->low_link)
3961 cur_val->low_link = src->val->low_link;
3963 else if (src->val->on_stack
3964 && src->val->dfs < cur_val->low_link)
3965 cur_val->low_link = src->val->dfs;
3968 if (cur_val->dfs == cur_val->low_link)
3970 ipcp_value<valtype> *v, *scc_list = NULL;
3974 v = stack;
3975 stack = v->topo_next;
3976 v->on_stack = false;
3977 v->scc_no = cur_val->dfs;
3979 v->scc_next = scc_list;
3980 scc_list = v;
3982 while (v != cur_val);
3984 cur_val->topo_next = values_topo;
3985 values_topo = cur_val;
3989 /* Add all values in lattices associated with NODE to the topological sort if
3990 they are not there yet. */
3992 static void
3993 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3995 ipa_node_params *info = ipa_node_params_sum->get (node);
3996 int i, count = ipa_get_param_count (info);
3998 for (i = 0; i < count; i++)
4000 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
4001 ipcp_lattice<tree> *lat = &plats->itself;
4002 struct ipcp_agg_lattice *aglat;
4004 if (!lat->bottom)
4006 ipcp_value<tree> *val;
4007 for (val = lat->values; val; val = val->next)
4008 topo->constants.add_val (val);
4011 if (!plats->aggs_bottom)
4012 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4013 if (!aglat->bottom)
4015 ipcp_value<tree> *val;
4016 for (val = aglat->values; val; val = val->next)
4017 topo->constants.add_val (val);
4020 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
4021 if (!ctxlat->bottom)
4023 ipcp_value<ipa_polymorphic_call_context> *ctxval;
4024 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
4025 topo->contexts.add_val (ctxval);
4030 /* One pass of constants propagation along the call graph edges, from callers
4031 to callees (requires topological ordering in TOPO), iterate over strongly
4032 connected components. */
4034 static void
4035 propagate_constants_topo (class ipa_topo_info *topo)
4037 int i;
4039 for (i = topo->nnodes - 1; i >= 0; i--)
4041 unsigned j;
4042 struct cgraph_node *v, *node = topo->order[i];
4043 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
4045 /* First, iteratively propagate within the strongly connected component
4046 until all lattices stabilize. */
4047 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4048 if (v->has_gimple_body_p ())
4050 if (opt_for_fn (v->decl, flag_ipa_cp)
4051 && opt_for_fn (v->decl, optimize))
4052 push_node_to_stack (topo, v);
4053 /* When V is not optimized, we can not push it to stack, but
4054 still we need to set all its callees lattices to bottom. */
4055 else
4057 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4058 propagate_constants_across_call (cs);
4062 v = pop_node_from_stack (topo);
4063 while (v)
4065 struct cgraph_edge *cs;
4066 class ipa_node_params *info = NULL;
4067 bool self_scc = true;
4069 for (cs = v->callees; cs; cs = cs->next_callee)
4070 if (ipa_edge_within_scc (cs))
4072 cgraph_node *callee = cs->callee->function_symbol ();
4074 if (v != callee)
4075 self_scc = false;
4077 if (!info)
4079 info = ipa_node_params_sum->get (v);
4080 info->node_within_scc = true;
4083 if (propagate_constants_across_call (cs))
4084 push_node_to_stack (topo, callee);
4087 if (info)
4088 info->node_is_self_scc = self_scc;
4090 v = pop_node_from_stack (topo);
4093 /* Afterwards, propagate along edges leading out of the SCC, calculates
4094 the local effects of the discovered constants and all valid values to
4095 their topological sort. */
4096 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4097 if (v->has_gimple_body_p ()
4098 && opt_for_fn (v->decl, flag_ipa_cp)
4099 && opt_for_fn (v->decl, optimize))
4101 struct cgraph_edge *cs;
4103 estimate_local_effects (v);
4104 add_all_node_vals_to_toposort (v, topo);
4105 for (cs = v->callees; cs; cs = cs->next_callee)
4106 if (!ipa_edge_within_scc (cs))
4107 propagate_constants_across_call (cs);
4109 cycle_nodes.release ();
4113 /* Propagate the estimated effects of individual values along the topological
4114 from the dependent values to those they depend on. */
4116 template <typename valtype>
4117 void
4118 value_topo_info<valtype>::propagate_effects ()
4120 ipcp_value<valtype> *base;
4121 hash_set<ipcp_value<valtype> *> processed_srcvals;
4123 for (base = values_topo; base; base = base->topo_next)
4125 ipcp_value_source<valtype> *src;
4126 ipcp_value<valtype> *val;
4127 sreal time = 0;
4128 HOST_WIDE_INT size = 0;
4130 for (val = base; val; val = val->scc_next)
4132 time = time + val->local_time_benefit + val->prop_time_benefit;
4133 size = size + val->local_size_cost + val->prop_size_cost;
4136 for (val = base; val; val = val->scc_next)
4138 processed_srcvals.empty ();
4139 for (src = val->sources; src; src = src->next)
4140 if (src->val
4141 && src->cs->maybe_hot_p ())
4143 if (!processed_srcvals.add (src->val))
4145 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4146 if (prop_size < INT_MAX)
4147 src->val->prop_size_cost = prop_size;
4148 else
4149 continue;
4152 int special_factor = 1;
4153 if (val->same_scc (src->val))
4154 special_factor
4155 = opt_for_fn(src->cs->caller->decl,
4156 param_ipa_cp_recursive_freq_factor);
4157 else if (val->self_recursion_generated_p ()
4158 && (src->cs->callee->function_symbol ()
4159 == src->cs->caller))
4161 int max_recur_gen_depth
4162 = opt_for_fn(src->cs->caller->decl,
4163 param_ipa_cp_max_recursive_depth);
4164 special_factor = max_recur_gen_depth
4165 - val->self_recursion_generated_level + 1;
4168 src->val->prop_time_benefit
4169 += time * special_factor * src->cs->sreal_frequency ();
4172 if (size < INT_MAX)
4174 val->prop_time_benefit = time;
4175 val->prop_size_cost = size;
4177 else
4179 val->prop_time_benefit = 0;
4180 val->prop_size_cost = 0;
4186 /* Callback for qsort to sort counts of all edges. */
4188 static int
4189 compare_edge_profile_counts (const void *a, const void *b)
4191 const profile_count *cnt1 = (const profile_count *) a;
4192 const profile_count *cnt2 = (const profile_count *) b;
4194 if (*cnt1 < *cnt2)
4195 return 1;
4196 if (*cnt1 > *cnt2)
4197 return -1;
4198 return 0;
4202 /* Propagate constants, polymorphic contexts and their effects from the
4203 summaries interprocedurally. */
4205 static void
4206 ipcp_propagate_stage (class ipa_topo_info *topo)
4208 struct cgraph_node *node;
4210 if (dump_file)
4211 fprintf (dump_file, "\n Propagating constants:\n\n");
4213 base_count = profile_count::uninitialized ();
4215 bool compute_count_base = false;
4216 unsigned base_count_pos_percent = 0;
4217 FOR_EACH_DEFINED_FUNCTION (node)
4219 if (node->has_gimple_body_p ()
4220 && opt_for_fn (node->decl, flag_ipa_cp)
4221 && opt_for_fn (node->decl, optimize))
4223 ipa_node_params *info = ipa_node_params_sum->get (node);
4224 determine_versionability (node, info);
4226 unsigned nlattices = ipa_get_param_count (info);
4227 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4228 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4229 initialize_node_lattices (node);
4231 ipa_size_summary *s = ipa_size_summaries->get (node);
4232 if (node->definition && !node->alias && s != NULL)
4233 overall_size += s->self_size;
4234 if (node->count.ipa ().initialized_p ())
4236 compute_count_base = true;
4237 unsigned pos_percent = opt_for_fn (node->decl,
4238 param_ipa_cp_profile_count_base);
4239 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4243 if (compute_count_base)
4245 auto_vec<profile_count> all_edge_counts;
4246 all_edge_counts.reserve_exact (symtab->edges_count);
4247 FOR_EACH_DEFINED_FUNCTION (node)
4248 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4250 profile_count count = cs->count.ipa ();
4251 if (!count.nonzero_p ())
4252 continue;
4254 enum availability avail;
4255 cgraph_node *tgt
4256 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4257 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4258 if (info && info->versionable)
4259 all_edge_counts.quick_push (count);
4262 if (!all_edge_counts.is_empty ())
4264 gcc_assert (base_count_pos_percent <= 100);
4265 all_edge_counts.qsort (compare_edge_profile_counts);
4267 unsigned base_count_pos
4268 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4269 base_count = all_edge_counts[base_count_pos];
4271 if (dump_file)
4273 fprintf (dump_file, "\nSelected base_count from %u edges at "
4274 "position %u, arriving at: ", all_edge_counts.length (),
4275 base_count_pos);
4276 base_count.dump (dump_file);
4277 fprintf (dump_file, "\n");
4280 else if (dump_file)
4281 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4282 "continuing as if without profile feedback.\n");
4285 orig_overall_size = overall_size;
4287 if (dump_file)
4288 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4290 propagate_constants_topo (topo);
4291 if (flag_checking)
4292 ipcp_verify_propagated_values ();
4293 topo->constants.propagate_effects ();
4294 topo->contexts.propagate_effects ();
4296 if (dump_file)
4298 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4299 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4303 /* Discover newly direct outgoing edges from NODE which is a new clone with
4304 known KNOWN_CSTS and make them direct. */
4306 static void
4307 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4308 vec<tree> known_csts,
4309 vec<ipa_polymorphic_call_context>
4310 known_contexts,
4311 vec<ipa_argagg_value, va_gc> *aggvals)
4313 struct cgraph_edge *ie, *next_ie;
4314 bool found = false;
4316 for (ie = node->indirect_calls; ie; ie = next_ie)
4318 tree target;
4319 bool speculative;
4321 next_ie = ie->next_callee;
4322 ipa_argagg_value_list avs (aggvals);
4323 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4324 avs, &speculative);
4325 if (target)
4327 bool agg_contents = ie->indirect_info->agg_contents;
4328 bool polymorphic = ie->indirect_info->polymorphic;
4329 int param_index = ie->indirect_info->param_index;
4330 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4331 speculative);
4332 found = true;
4334 if (cs && !agg_contents && !polymorphic)
4336 ipa_node_params *info = ipa_node_params_sum->get (node);
4337 int c = ipa_get_controlled_uses (info, param_index);
4338 if (c != IPA_UNDESCRIBED_USE
4339 && !ipa_get_param_load_dereferenced (info, param_index))
4341 struct ipa_ref *to_del;
4343 c--;
4344 ipa_set_controlled_uses (info, param_index, c);
4345 if (dump_file && (dump_flags & TDF_DETAILS))
4346 fprintf (dump_file, " controlled uses count of param "
4347 "%i bumped down to %i\n", param_index, c);
4348 if (c == 0
4349 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4351 if (dump_file && (dump_flags & TDF_DETAILS))
4352 fprintf (dump_file, " and even removing its "
4353 "cloning-created reference\n");
4354 to_del->remove_reference ();
4360 /* Turning calls to direct calls will improve overall summary. */
4361 if (found)
4362 ipa_update_overall_fn_summary (node);
4365 class edge_clone_summary;
4366 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4368 /* Edge clone summary. */
4370 class edge_clone_summary
4372 public:
4373 /* Default constructor. */
4374 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4376 /* Default destructor. */
4377 ~edge_clone_summary ()
4379 if (prev_clone)
4380 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4381 if (next_clone)
4382 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4385 cgraph_edge *prev_clone;
4386 cgraph_edge *next_clone;
4389 class edge_clone_summary_t:
4390 public call_summary <edge_clone_summary *>
4392 public:
4393 edge_clone_summary_t (symbol_table *symtab):
4394 call_summary <edge_clone_summary *> (symtab)
4396 m_initialize_when_cloning = true;
4399 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4400 edge_clone_summary *src_data,
4401 edge_clone_summary *dst_data) final override;
4404 /* Edge duplication hook. */
4406 void
4407 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4408 edge_clone_summary *src_data,
4409 edge_clone_summary *dst_data)
4411 if (src_data->next_clone)
4412 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4413 dst_data->prev_clone = src_edge;
4414 dst_data->next_clone = src_data->next_clone;
4415 src_data->next_clone = dst_edge;
4418 /* Return true is CS calls DEST or its clone for all contexts. When
4419 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4420 edges from/to an all-context clone. */
4422 static bool
4423 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4424 bool allow_recursion_to_clone)
4426 enum availability availability;
4427 cgraph_node *callee = cs->callee->function_symbol (&availability);
4429 if (availability <= AVAIL_INTERPOSABLE)
4430 return false;
4431 if (callee == dest)
4432 return true;
4433 if (!allow_recursion_to_clone && cs->caller == callee)
4434 return false;
4436 ipa_node_params *info = ipa_node_params_sum->get (callee);
4437 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4440 /* Return true if edge CS does bring about the value described by SRC to
4441 DEST_VAL of node DEST or its clone for all contexts. */
4443 static bool
4444 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4445 cgraph_node *dest, ipcp_value<tree> *dest_val)
4447 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4449 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4450 || caller_info->node_dead)
4451 return false;
4453 if (!src->val)
4454 return true;
4456 if (caller_info->ipcp_orig_node)
4458 tree t = NULL_TREE;
4459 if (src->offset == -1)
4460 t = caller_info->known_csts[src->index];
4461 else if (ipcp_transformation *ts
4462 = ipcp_get_transformation_summary (cs->caller))
4464 ipa_argagg_value_list avl (ts);
4465 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4467 return (t != NULL_TREE
4468 && values_equal_for_ipcp_p (src->val->value, t));
4470 else
4472 if (src->val == dest_val)
4473 return true;
4475 struct ipcp_agg_lattice *aglat;
4476 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4477 src->index);
4478 if (src->offset == -1)
4479 return (plats->itself.is_single_const ()
4480 && values_equal_for_ipcp_p (src->val->value,
4481 plats->itself.values->value));
4482 else
4484 if (plats->aggs_bottom || plats->aggs_contain_variable)
4485 return false;
4486 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4487 if (aglat->offset == src->offset)
4488 return (aglat->is_single_const ()
4489 && values_equal_for_ipcp_p (src->val->value,
4490 aglat->values->value));
4492 return false;
4496 /* Return true if edge CS does bring about the value described by SRC to
4497 DST_VAL of node DEST or its clone for all contexts. */
4499 static bool
4500 cgraph_edge_brings_value_p (cgraph_edge *cs,
4501 ipcp_value_source<ipa_polymorphic_call_context> *src,
4502 cgraph_node *dest,
4503 ipcp_value<ipa_polymorphic_call_context> *)
4505 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4507 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4508 || caller_info->node_dead)
4509 return false;
4510 if (!src->val)
4511 return true;
4513 if (caller_info->ipcp_orig_node)
4514 return (caller_info->known_contexts.length () > (unsigned) src->index)
4515 && values_equal_for_ipcp_p (src->val->value,
4516 caller_info->known_contexts[src->index]);
4518 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4519 src->index);
4520 return plats->ctxlat.is_single_const ()
4521 && values_equal_for_ipcp_p (src->val->value,
4522 plats->ctxlat.values->value);
4525 /* Get the next clone in the linked list of clones of an edge. */
4527 static inline struct cgraph_edge *
4528 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4530 edge_clone_summary *s = edge_clone_summaries->get (cs);
4531 return s != NULL ? s->next_clone : NULL;
4534 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4535 of them is viable and hot, return true. In that case, for those that still
4536 hold, add their edge frequency and their number and cumulative profile
4537 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4538 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4540 template <typename valtype>
4541 static bool
4542 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4543 sreal *freq_sum, int *caller_count,
4544 profile_count *rec_count_sum,
4545 profile_count *nonrec_count_sum)
4547 ipcp_value_source<valtype> *src;
4548 sreal freq = 0;
4549 int count = 0;
4550 profile_count rec_cnt = profile_count::zero ();
4551 profile_count nonrec_cnt = profile_count::zero ();
4552 bool hot = false;
4553 bool non_self_recursive = false;
4555 for (src = val->sources; src; src = src->next)
4557 struct cgraph_edge *cs = src->cs;
4558 while (cs)
4560 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4562 count++;
4563 freq += cs->sreal_frequency ();
4564 hot |= cs->maybe_hot_p ();
4565 if (cs->caller != dest)
4567 non_self_recursive = true;
4568 if (cs->count.ipa ().initialized_p ())
4569 rec_cnt += cs->count.ipa ();
4571 else if (cs->count.ipa ().initialized_p ())
4572 nonrec_cnt += cs->count.ipa ();
4574 cs = get_next_cgraph_edge_clone (cs);
4578 /* If the only edges bringing a value are self-recursive ones, do not bother
4579 evaluating it. */
4580 if (!non_self_recursive)
4581 return false;
4583 *freq_sum = freq;
4584 *caller_count = count;
4585 *rec_count_sum = rec_cnt;
4586 *nonrec_count_sum = nonrec_cnt;
4588 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4590 struct cgraph_edge *cs;
4592 /* Cold non-SCC source edge could trigger hot recursive execution of
4593 function. Consider the case as hot and rely on following cost model
4594 computation to further select right one. */
4595 for (cs = dest->callers; cs; cs = cs->next_caller)
4596 if (cs->caller == dest && cs->maybe_hot_p ())
4597 return true;
4600 return hot;
4603 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4604 to let a non-self-recursive caller be the first element. Thus, we can
4605 simplify intersecting operations on values that arrive from all of these
4606 callers, especially when there exists self-recursive call. Return true if
4607 this kind of adjustment is possible. */
4609 static bool
4610 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4611 cgraph_node *node)
4613 for (unsigned i = 0; i < callers.length (); i++)
4615 cgraph_edge *cs = callers[i];
4617 if (cs->caller != node)
4619 if (i > 0)
4621 callers[i] = callers[0];
4622 callers[0] = cs;
4624 return true;
4627 return false;
4630 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4631 is assumed their number is known and equal to CALLER_COUNT. */
4633 template <typename valtype>
4634 static vec<cgraph_edge *>
4635 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4636 int caller_count)
4638 ipcp_value_source<valtype> *src;
4639 vec<cgraph_edge *> ret;
4641 ret.create (caller_count);
4642 for (src = val->sources; src; src = src->next)
4644 struct cgraph_edge *cs = src->cs;
4645 while (cs)
4647 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4648 ret.quick_push (cs);
4649 cs = get_next_cgraph_edge_clone (cs);
4653 if (caller_count > 1)
4654 adjust_callers_for_value_intersection (ret, dest);
4656 return ret;
4659 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4660 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4661 should be set to true when the reference created for the constant should be
4662 a load one and not an address one because the corresponding parameter p is
4663 only used as *p. */
4665 static struct ipa_replace_map *
4666 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4667 bool force_load_ref)
4669 struct ipa_replace_map *replace_map;
4671 replace_map = ggc_alloc<ipa_replace_map> ();
4672 if (dump_file)
4674 fprintf (dump_file, " replacing ");
4675 ipa_dump_param (dump_file, info, parm_num);
4677 fprintf (dump_file, " with const ");
4678 print_generic_expr (dump_file, value);
4680 if (force_load_ref)
4681 fprintf (dump_file, " - forcing load reference\n");
4682 else
4683 fprintf (dump_file, "\n");
4685 replace_map->parm_num = parm_num;
4686 replace_map->new_tree = value;
4687 replace_map->force_load_ref = force_load_ref;
4688 return replace_map;
4691 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4692 one, otherwise it will be referred to as the original node. */
4694 static void
4695 dump_profile_updates (cgraph_node *node, bool spec)
4697 if (spec)
4698 fprintf (dump_file, " setting count of the specialized node %s to ",
4699 node->dump_name ());
4700 else
4701 fprintf (dump_file, " setting count of the original node %s to ",
4702 node->dump_name ());
4704 node->count.dump (dump_file);
4705 fprintf (dump_file, "\n");
4706 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4708 fprintf (dump_file, " edge to %s has count ",
4709 cs->callee->dump_name ());
4710 cs->count.dump (dump_file);
4711 fprintf (dump_file, "\n");
4715 /* With partial train run we do not want to assume that original's count is
4716 zero whenever we redurect all executed edges to clone. Simply drop profile
4717 to local one in this case. In eany case, return the new value. ORIG_NODE
4718 is the original node and its count has not been updaed yet. */
4720 profile_count
4721 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4723 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4724 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4725 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4726 remainder = remainder.guessed_local ();
4728 return remainder;
4731 /* Structure to sum counts coming from nodes other than the original node and
4732 its clones. */
4734 struct gather_other_count_struct
4736 cgraph_node *orig;
4737 profile_count other_count;
4740 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4741 counts that come from non-self-recursive calls.. */
4743 static bool
4744 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4746 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4747 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4748 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4749 desc->other_count += cs->count.ipa ();
4750 return false;
4753 /* Structure to help analyze if we need to boost counts of some clones of some
4754 non-recursive edges to match the new callee count. */
4756 struct desc_incoming_count_struct
4758 cgraph_node *orig;
4759 hash_set <cgraph_edge *> *processed_edges;
4760 profile_count count;
4761 unsigned unproc_orig_rec_edges;
4764 /* Go over edges calling NODE and its thunks and gather information about
4765 incoming counts so that we know if we need to make any adjustments. */
4767 static void
4768 analyze_clone_icoming_counts (cgraph_node *node,
4769 desc_incoming_count_struct *desc)
4771 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4772 if (cs->caller->thunk)
4774 analyze_clone_icoming_counts (cs->caller, desc);
4775 continue;
4777 else
4779 if (cs->count.initialized_p ())
4780 desc->count += cs->count.ipa ();
4781 if (!desc->processed_edges->contains (cs)
4782 && cs->caller->clone_of == desc->orig)
4783 desc->unproc_orig_rec_edges++;
4787 /* If caller edge counts of a clone created for a self-recursive arithmetic
4788 jump function must be adjusted because it is coming from a the "seed" clone
4789 for the first value and so has been excessively scaled back as if it was not
4790 a recursive call, adjust it so that the incoming counts of NODE match its
4791 count. NODE is the node or its thunk. */
4793 static void
4794 adjust_clone_incoming_counts (cgraph_node *node,
4795 desc_incoming_count_struct *desc)
4797 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4798 if (cs->caller->thunk)
4800 adjust_clone_incoming_counts (cs->caller, desc);
4801 profile_count sum = profile_count::zero ();
4802 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4803 if (e->count.initialized_p ())
4804 sum += e->count.ipa ();
4805 cs->count = cs->count.combine_with_ipa_count (sum);
4807 else if (!desc->processed_edges->contains (cs)
4808 && cs->caller->clone_of == desc->orig)
4810 cs->count += desc->count;
4811 if (dump_file)
4813 fprintf (dump_file, " Adjusted count of an incoming edge of "
4814 "a clone %s -> %s to ", cs->caller->dump_name (),
4815 cs->callee->dump_name ());
4816 cs->count.dump (dump_file);
4817 fprintf (dump_file, "\n");
4822 /* When ORIG_NODE has been cloned for values which have been generated fora
4823 self-recursive call as a result of an arithmetic pass-through
4824 jump-functions, adjust its count together with counts of all such clones in
4825 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4827 The function sums the counts of the original node and all its clones that
4828 cannot be attributed to a specific clone because it comes from a
4829 non-recursive edge. This sum is then evenly divided between the clones and
4830 on top of that each one gets all the counts which can be attributed directly
4831 to it. */
4833 static void
4834 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4835 const vec<cgraph_node *> &self_gen_clones)
4837 profile_count redist_sum = orig_node->count.ipa ();
4838 if (!(redist_sum > profile_count::zero ()))
4839 return;
4841 if (dump_file)
4842 fprintf (dump_file, " Updating profile of self recursive clone "
4843 "series\n");
4845 gather_other_count_struct gocs;
4846 gocs.orig = orig_node;
4847 gocs.other_count = profile_count::zero ();
4849 auto_vec <profile_count, 8> other_edges_count;
4850 for (cgraph_node *n : self_gen_clones)
4852 gocs.other_count = profile_count::zero ();
4853 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4854 &gocs, false);
4855 other_edges_count.safe_push (gocs.other_count);
4856 redist_sum -= gocs.other_count;
4859 hash_set<cgraph_edge *> processed_edges;
4860 unsigned i = 0;
4861 for (cgraph_node *n : self_gen_clones)
4863 profile_count orig_count = n->count;
4864 profile_count new_count
4865 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4866 new_count = lenient_count_portion_handling (new_count, orig_node);
4867 n->count = new_count;
4868 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4869 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4871 cs->count = cs->count.apply_scale (new_count, orig_count);
4872 processed_edges.add (cs);
4874 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4875 cs->count = cs->count.apply_scale (new_count, orig_count);
4877 i++;
4880 /* There are still going to be edges to ORIG_NODE that have one or more
4881 clones coming from another node clone in SELF_GEN_CLONES and which we
4882 scaled by the same amount, which means that the total incoming sum of
4883 counts to ORIG_NODE will be too high, scale such edges back. */
4884 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4886 if (cs->callee->ultimate_alias_target () == orig_node)
4888 unsigned den = 0;
4889 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4890 if (e->callee->ultimate_alias_target () == orig_node
4891 && processed_edges.contains (e))
4892 den++;
4893 if (den > 0)
4894 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4895 if (e->callee->ultimate_alias_target () == orig_node
4896 && processed_edges.contains (e))
4897 e->count /= den;
4901 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4902 along self-recursive edges are likely to have fairly low count and so
4903 edges from them to nodes in the self_gen_clones do not correspond to the
4904 artificially distributed count of the nodes, the total sum of incoming
4905 edges to some clones might be too low. Detect this situation and correct
4906 it. */
4907 for (cgraph_node *n : self_gen_clones)
4909 if (!(n->count.ipa () > profile_count::zero ()))
4910 continue;
4912 desc_incoming_count_struct desc;
4913 desc.orig = orig_node;
4914 desc.processed_edges = &processed_edges;
4915 desc.count = profile_count::zero ();
4916 desc.unproc_orig_rec_edges = 0;
4917 analyze_clone_icoming_counts (n, &desc);
4919 if (n->count.differs_from_p (desc.count))
4921 if (n->count > desc.count
4922 && desc.unproc_orig_rec_edges > 0)
4924 desc.count = n->count - desc.count;
4925 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4926 adjust_clone_incoming_counts (n, &desc);
4928 else if (dump_file)
4929 fprintf (dump_file,
4930 " Unable to fix up incoming counts for %s.\n",
4931 n->dump_name ());
4935 if (dump_file)
4936 for (cgraph_node *n : self_gen_clones)
4937 dump_profile_updates (n, n != orig_node);
4938 return;
4941 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4942 their profile information to reflect this. This function should not be used
4943 for clones generated for arithmetic pass-through jump functions on a
4944 self-recursive call graph edge, that situation is handled by
4945 update_counts_for_self_gen_clones. */
4947 static void
4948 update_profiling_info (struct cgraph_node *orig_node,
4949 struct cgraph_node *new_node)
4951 struct caller_statistics stats;
4952 profile_count new_sum;
4953 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4955 if (!(orig_node_count > profile_count::zero ()))
4956 return;
4958 if (dump_file)
4960 fprintf (dump_file, " Updating profile from original count: ");
4961 orig_node_count.dump (dump_file);
4962 fprintf (dump_file, "\n");
4965 init_caller_stats (&stats, new_node);
4966 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4967 false);
4968 new_sum = stats.count_sum;
4970 if (new_sum > orig_node_count)
4972 /* TODO: Perhaps this should be gcc_unreachable ()? */
4973 remainder = profile_count::zero ().guessed_local ();
4975 else if (stats.rec_count_sum.nonzero_p ())
4977 int new_nonrec_calls = stats.n_nonrec_calls;
4978 /* There are self-recursive edges which are likely to bring in the
4979 majority of calls but which we must divide in between the original and
4980 new node. */
4981 init_caller_stats (&stats, orig_node);
4982 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4983 &stats, false);
4984 int orig_nonrec_calls = stats.n_nonrec_calls;
4985 profile_count orig_nonrec_call_count = stats.count_sum;
4987 if (orig_node->local)
4989 if (!orig_nonrec_call_count.nonzero_p ())
4991 if (dump_file)
4992 fprintf (dump_file, " The original is local and the only "
4993 "incoming edges from non-dead callers with nonzero "
4994 "counts are self-recursive, assuming it is cold.\n");
4995 /* The NEW_NODE count and counts of all its outgoing edges
4996 are still unmodified copies of ORIG_NODE's. Just clear
4997 the latter and bail out. */
4998 profile_count zero;
4999 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
5000 zero = profile_count::zero ().guessed_local ();
5001 else
5002 zero = profile_count::adjusted_zero ();
5003 orig_node->count = zero;
5004 for (cgraph_edge *cs = orig_node->callees;
5006 cs = cs->next_callee)
5007 cs->count = zero;
5008 for (cgraph_edge *cs = orig_node->indirect_calls;
5010 cs = cs->next_callee)
5011 cs->count = zero;
5012 return;
5015 else
5017 /* Let's behave as if there was another caller that accounts for all
5018 the calls that were either indirect or from other compilation
5019 units. */
5020 orig_nonrec_calls++;
5021 profile_count pretend_caller_count
5022 = (orig_node_count - new_sum - orig_nonrec_call_count
5023 - stats.rec_count_sum);
5024 orig_nonrec_call_count += pretend_caller_count;
5027 /* Divide all "unexplained" counts roughly proportionally to sums of
5028 counts of non-recursive calls.
5030 We put rather arbitrary limits on how many counts we claim because the
5031 number of non-self-recursive incoming count is only a rough guideline
5032 and there are cases (such as mcf) where using it blindly just takes
5033 too many. And if lattices are considered in the opposite order we
5034 could also take too few. */
5035 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
5037 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
5038 profile_count new_part
5039 = MAX(MIN (unexp.apply_scale (new_sum,
5040 new_sum + orig_nonrec_call_count),
5041 unexp.apply_scale (limit_den - 1, limit_den)),
5042 unexp.apply_scale (new_nonrec_calls, limit_den));
5043 if (dump_file)
5045 fprintf (dump_file, " Claiming ");
5046 new_part.dump (dump_file);
5047 fprintf (dump_file, " of unexplained ");
5048 unexp.dump (dump_file);
5049 fprintf (dump_file, " counts because of self-recursive "
5050 "calls\n");
5052 new_sum += new_part;
5053 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5054 orig_node);
5056 else
5057 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
5058 orig_node);
5060 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
5061 new_node->count = new_sum;
5062 orig_node->count = remainder;
5064 profile_count orig_new_node_count = orig_node_count;
5065 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
5066 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
5067 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5068 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
5069 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
5071 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
5072 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
5073 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5074 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
5075 cs->count = cs->count.apply_scale (remainder, orig_node_count);
5077 if (dump_file)
5079 dump_profile_updates (new_node, true);
5080 dump_profile_updates (orig_node, false);
5084 /* Update the respective profile of specialized NEW_NODE and the original
5085 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
5086 have been redirected to the specialized version. */
5088 static void
5089 update_specialized_profile (struct cgraph_node *new_node,
5090 struct cgraph_node *orig_node,
5091 profile_count redirected_sum)
5093 struct cgraph_edge *cs;
5094 profile_count new_node_count, orig_node_count = orig_node->count;
5096 if (dump_file)
5098 fprintf (dump_file, " the sum of counts of redirected edges is ");
5099 redirected_sum.dump (dump_file);
5100 fprintf (dump_file, "\n");
5102 if (!(orig_node_count > profile_count::zero ()))
5103 return;
5105 gcc_assert (orig_node_count >= redirected_sum);
5107 new_node_count = new_node->count;
5108 new_node->count += redirected_sum;
5109 orig_node->count -= redirected_sum;
5111 for (cs = new_node->callees; cs; cs = cs->next_callee)
5112 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
5114 for (cs = orig_node->callees; cs; cs = cs->next_callee)
5116 profile_count dec = cs->count.apply_scale (redirected_sum,
5117 orig_node_count);
5118 cs->count -= dec;
5121 if (dump_file)
5123 dump_profile_updates (new_node, true);
5124 dump_profile_updates (orig_node, false);
5128 static void adjust_references_in_caller (cgraph_edge *cs,
5129 symtab_node *symbol, int index);
5131 /* Simple structure to pass a symbol and index (with same meaning as parameters
5132 of adjust_references_in_caller) through a void* parameter of a
5133 call_for_symbol_thunks_and_aliases callback. */
5134 struct symbol_and_index_together
5136 symtab_node *symbol;
5137 int index;
5140 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
5141 adjust_references_in_caller on edges up in the call-graph, if necessary. */
5142 static bool
5143 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
5145 symbol_and_index_together *pack = (symbol_and_index_together *) data;
5146 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
5147 if (!cs->caller->thunk)
5148 adjust_references_in_caller (cs, pack->symbol, pack->index);
5149 return false;
5152 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
5153 variable which is only dereferenced and which is represented by SYMBOL. See
5154 if we can remove ADDR reference in callers assosiated witht the call. */
5156 static void
5157 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
5159 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5160 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
5161 if (jfunc->type == IPA_JF_CONST)
5163 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
5164 cs->lto_stmt_uid);
5165 if (!to_del)
5166 return;
5167 to_del->remove_reference ();
5168 if (dump_file)
5169 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5170 cs->caller->dump_name (), symbol->dump_name ());
5171 return;
5174 if (jfunc->type != IPA_JF_PASS_THROUGH
5175 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
5176 return;
5178 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5179 cgraph_node *caller = cs->caller;
5180 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5181 /* TODO: This consistency check may be too big and not really
5182 that useful. Consider removing it. */
5183 tree cst;
5184 if (caller_info->ipcp_orig_node)
5185 cst = caller_info->known_csts[fidx];
5186 else
5188 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5189 gcc_assert (lat->is_single_const ());
5190 cst = lat->values->value;
5192 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5193 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5194 == symbol));
5196 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5197 if (cuses == IPA_UNDESCRIBED_USE)
5198 return;
5199 gcc_assert (cuses > 0);
5200 cuses--;
5201 ipa_set_controlled_uses (caller_info, fidx, cuses);
5202 if (cuses)
5203 return;
5205 if (caller_info->ipcp_orig_node)
5207 /* Cloning machinery has created a reference here, we need to either
5208 remove it or change it to a read one. */
5209 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
5210 if (to_del && to_del->use == IPA_REF_ADDR)
5212 to_del->remove_reference ();
5213 if (dump_file)
5214 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5215 cs->caller->dump_name (), symbol->dump_name ());
5216 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5218 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5219 if (dump_file)
5220 fprintf (dump_file,
5221 " ...and replaced it with LOAD one.\n");
5226 symbol_and_index_together pack;
5227 pack.symbol = symbol;
5228 pack.index = fidx;
5229 if (caller->can_change_signature)
5230 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5231 &pack, true);
5235 /* Return true if we would like to remove a parameter from NODE when cloning it
5236 with KNOWN_CSTS scalar constants. */
5238 static bool
5239 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5241 auto_vec<bool, 16> surviving;
5242 bool filled_vec = false;
5243 ipa_node_params *info = ipa_node_params_sum->get (node);
5244 int i, count = ipa_get_param_count (info);
5246 for (i = 0; i < count; i++)
5248 if (!known_csts[i] && ipa_is_param_used (info, i))
5249 continue;
5251 if (!filled_vec)
5253 clone_info *info = clone_info::get (node);
5254 if (!info || !info->param_adjustments)
5255 return true;
5256 info->param_adjustments->get_surviving_params (&surviving);
5257 filled_vec = true;
5259 if (surviving.length() < (unsigned) i && surviving[i])
5260 return true;
5262 return false;
5265 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5266 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5267 redirect all edges in CALLERS to it. */
5269 static struct cgraph_node *
5270 create_specialized_node (struct cgraph_node *node,
5271 vec<tree> known_csts,
5272 vec<ipa_polymorphic_call_context> known_contexts,
5273 vec<ipa_argagg_value, va_gc> *aggvals,
5274 vec<cgraph_edge *> &callers)
5276 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5277 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5278 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5279 struct cgraph_node *new_node;
5280 int i, count = ipa_get_param_count (info);
5281 clone_info *cinfo = clone_info::get (node);
5282 ipa_param_adjustments *old_adjustments = cinfo
5283 ? cinfo->param_adjustments : NULL;
5284 ipa_param_adjustments *new_adjustments;
5285 gcc_assert (!info->ipcp_orig_node);
5286 gcc_assert (node->can_change_signature
5287 || !old_adjustments);
5289 if (old_adjustments)
5291 /* At the moment all IPA optimizations should use the number of
5292 parameters of the prevailing decl as the m_always_copy_start.
5293 Handling any other value would complicate the code below, so for the
5294 time bing let's only assert it is so. */
5295 gcc_assert (old_adjustments->m_always_copy_start == count
5296 || old_adjustments->m_always_copy_start < 0);
5297 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5298 for (i = 0; i < old_adj_count; i++)
5300 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5301 if (!node->can_change_signature
5302 || old_adj->op != IPA_PARAM_OP_COPY
5303 || (!known_csts[old_adj->base_index]
5304 && ipa_is_param_used (info, old_adj->base_index)))
5306 ipa_adjusted_param new_adj = *old_adj;
5308 new_adj.prev_clone_adjustment = true;
5309 new_adj.prev_clone_index = i;
5310 vec_safe_push (new_params, new_adj);
5313 bool skip_return = old_adjustments->m_skip_return;
5314 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5315 ipa_param_adjustments (new_params, count,
5316 skip_return));
5318 else if (node->can_change_signature
5319 && want_remove_some_param_p (node, known_csts))
5321 ipa_adjusted_param adj;
5322 memset (&adj, 0, sizeof (adj));
5323 adj.op = IPA_PARAM_OP_COPY;
5324 for (i = 0; i < count; i++)
5325 if (!known_csts[i] && ipa_is_param_used (info, i))
5327 adj.base_index = i;
5328 adj.prev_clone_index = i;
5329 vec_safe_push (new_params, adj);
5331 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5332 ipa_param_adjustments (new_params, count, false));
5334 else
5335 new_adjustments = NULL;
5337 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5338 for (i = callers.length () - 1; i >= 0; i--)
5340 cgraph_edge *cs = callers[i];
5341 if (cs->caller == node)
5343 self_recursive_calls.safe_push (cs);
5344 callers.unordered_remove (i);
5347 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5348 for (i = 0; i < count; i++)
5350 tree t = known_csts[i];
5351 if (!t)
5352 continue;
5354 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5356 bool load_ref = false;
5357 symtab_node *ref_symbol;
5358 if (TREE_CODE (t) == ADDR_EXPR)
5360 tree base = get_base_address (TREE_OPERAND (t, 0));
5361 if (TREE_CODE (base) == VAR_DECL
5362 && ipa_get_controlled_uses (info, i) == 0
5363 && ipa_get_param_load_dereferenced (info, i)
5364 && (ref_symbol = symtab_node::get (base)))
5366 load_ref = true;
5367 if (node->can_change_signature)
5368 for (cgraph_edge *caller : callers)
5369 adjust_references_in_caller (caller, ref_symbol, i);
5373 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5374 if (replace_map)
5375 vec_safe_push (replace_trees, replace_map);
5378 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5379 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5380 node->decl)));
5381 new_node = node->create_virtual_clone (callers, replace_trees,
5382 new_adjustments, "constprop",
5383 suffix_counter);
5384 suffix_counter++;
5386 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5387 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5389 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5390 /* Cloned edges can disappear during cloning as speculation can be
5391 resolved, check that we have one and that it comes from the last
5392 cloning. */
5393 if (cs && cs->caller == new_node)
5394 cs->redirect_callee_duplicating_thunks (new_node);
5395 /* Any future code that would make more than one clone of an outgoing
5396 edge would confuse this mechanism, so let's check that does not
5397 happen. */
5398 gcc_checking_assert (!cs
5399 || !get_next_cgraph_edge_clone (cs)
5400 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5402 if (have_self_recursive_calls)
5403 new_node->expand_all_artificial_thunks ();
5405 ipa_set_node_agg_value_chain (new_node, aggvals);
5406 for (const ipa_argagg_value &av : aggvals)
5407 new_node->maybe_create_reference (av.value, NULL);
5409 if (dump_file && (dump_flags & TDF_DETAILS))
5411 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5412 if (known_contexts.exists ())
5414 for (i = 0; i < count; i++)
5415 if (!known_contexts[i].useless_p ())
5417 fprintf (dump_file, " known ctx %i is ", i);
5418 known_contexts[i].dump (dump_file);
5421 if (aggvals)
5423 fprintf (dump_file, " Aggregate replacements:");
5424 ipa_argagg_value_list avs (aggvals);
5425 avs.dump (dump_file);
5429 new_info = ipa_node_params_sum->get (new_node);
5430 new_info->ipcp_orig_node = node;
5431 new_node->ipcp_clone = true;
5432 new_info->known_csts = known_csts;
5433 new_info->known_contexts = known_contexts;
5435 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5436 aggvals);
5438 return new_node;
5441 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5442 pass-through function to itself when the cgraph_node involved is not an
5443 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5444 no-operation pass-through. */
5446 static bool
5447 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5448 bool simple = true)
5450 enum availability availability;
5451 if (cs->caller == cs->callee->function_symbol (&availability)
5452 && availability > AVAIL_INTERPOSABLE
5453 && jfunc->type == IPA_JF_PASS_THROUGH
5454 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5455 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5456 && ipa_node_params_sum->get (cs->caller)
5457 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5458 return true;
5459 return false;
5462 /* Return true if JFUNC, which describes a part of an aggregate represented or
5463 pointed to by the i-th parameter of call CS, is a pass-through function to
5464 itself when the cgraph_node involved is not an IPA-CP clone.. When
5465 SIMPLE is true, further check if JFUNC is a simple no-operation
5466 pass-through. */
5468 static bool
5469 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5470 const ipa_agg_jf_item *jfunc,
5471 int i, bool simple = true)
5473 enum availability availability;
5474 if (cs->caller == cs->callee->function_symbol (&availability)
5475 && availability > AVAIL_INTERPOSABLE
5476 && jfunc->jftype == IPA_JF_LOAD_AGG
5477 && jfunc->offset == jfunc->value.load_agg.offset
5478 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5479 && jfunc->value.pass_through.formal_id == i
5480 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5481 && ipa_node_params_sum->get (cs->caller)
5482 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5483 return true;
5484 return false;
5487 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5488 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5490 static void
5491 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5492 vec<tree> &known_csts,
5493 const vec<cgraph_edge *> &callers)
5495 ipa_node_params *info = ipa_node_params_sum->get (node);
5496 int i, count = ipa_get_param_count (info);
5498 for (i = 0; i < count; i++)
5500 struct cgraph_edge *cs;
5501 tree newval = NULL_TREE;
5502 int j;
5503 bool first = true;
5504 tree type = ipa_get_type (info, i);
5506 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5507 continue;
5509 FOR_EACH_VEC_ELT (callers, j, cs)
5511 struct ipa_jump_func *jump_func;
5512 tree t;
5514 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5515 if (!args
5516 || i >= ipa_get_cs_argument_count (args)
5517 || (i == 0
5518 && call_passes_through_thunk (cs)))
5520 newval = NULL_TREE;
5521 break;
5523 jump_func = ipa_get_ith_jump_func (args, i);
5525 /* Besides simple pass-through jump function, arithmetic jump
5526 function could also introduce argument-direct-pass-through for
5527 self-feeding recursive call. For example,
5529 fn (int i)
5531 fn (i & 1);
5534 Given that i is 0, recursive propagation via (i & 1) also gets
5535 0. */
5536 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5538 gcc_assert (newval);
5539 t = ipa_get_jf_arith_result (
5540 ipa_get_jf_pass_through_operation (jump_func),
5541 newval,
5542 ipa_get_jf_pass_through_operand (jump_func),
5543 type);
5545 else
5546 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5547 jump_func, type);
5548 if (!t
5549 || (newval
5550 && !values_equal_for_ipcp_p (t, newval))
5551 || (!first && !newval))
5553 newval = NULL_TREE;
5554 break;
5556 else
5557 newval = t;
5558 first = false;
5561 if (newval)
5563 if (dump_file && (dump_flags & TDF_DETAILS))
5565 fprintf (dump_file, " adding an extra known scalar value ");
5566 print_ipcp_constant_value (dump_file, newval);
5567 fprintf (dump_file, " for ");
5568 ipa_dump_param (dump_file, info, i);
5569 fprintf (dump_file, "\n");
5572 known_csts[i] = newval;
5577 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5578 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5579 CALLERS. */
5581 static void
5582 find_more_contexts_for_caller_subset (cgraph_node *node,
5583 vec<ipa_polymorphic_call_context>
5584 *known_contexts,
5585 const vec<cgraph_edge *> &callers)
5587 ipa_node_params *info = ipa_node_params_sum->get (node);
5588 int i, count = ipa_get_param_count (info);
5590 for (i = 0; i < count; i++)
5592 cgraph_edge *cs;
5594 if (ipa_get_poly_ctx_lat (info, i)->bottom
5595 || (known_contexts->exists ()
5596 && !(*known_contexts)[i].useless_p ()))
5597 continue;
5599 ipa_polymorphic_call_context newval;
5600 bool first = true;
5601 int j;
5603 FOR_EACH_VEC_ELT (callers, j, cs)
5605 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5606 if (!args
5607 || i >= ipa_get_cs_argument_count (args))
5608 return;
5609 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5610 ipa_polymorphic_call_context ctx;
5611 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5612 cs, i, jfunc);
5613 if (first)
5615 newval = ctx;
5616 first = false;
5618 else
5619 newval.meet_with (ctx);
5620 if (newval.useless_p ())
5621 break;
5624 if (!newval.useless_p ())
5626 if (dump_file && (dump_flags & TDF_DETAILS))
5628 fprintf (dump_file, " adding an extra known polymorphic "
5629 "context ");
5630 print_ipcp_constant_value (dump_file, newval);
5631 fprintf (dump_file, " for ");
5632 ipa_dump_param (dump_file, info, i);
5633 fprintf (dump_file, "\n");
5636 if (!known_contexts->exists ())
5637 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5638 true);
5639 (*known_contexts)[i] = newval;
5645 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5646 RES. If INTERIM is non-NULL, it contains the current interim state of
5647 collected aggregate values which can be used to compute values passed over
5648 self-recursive edges.
5650 This basically one iteration of push_agg_values_from_edge over one
5651 parameter, which allows for simpler early returns. */
5653 static void
5654 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5655 vec<ipa_argagg_value> *res,
5656 const ipa_argagg_value_list *interim)
5658 bool agg_values_from_caller = false;
5659 bool agg_jf_preserved = false;
5660 unsigned unit_delta = UINT_MAX;
5661 int src_idx = -1;
5662 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5663 index);
5665 if (jfunc->type == IPA_JF_PASS_THROUGH
5666 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5668 agg_values_from_caller = true;
5669 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5670 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5671 unit_delta = 0;
5673 else if (jfunc->type == IPA_JF_ANCESTOR
5674 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5676 agg_values_from_caller = true;
5677 agg_jf_preserved = true;
5678 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5679 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5682 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5683 if (agg_values_from_caller)
5685 if (caller_info->ipcp_orig_node)
5687 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5688 ipcp_transformation *ts
5689 = ipcp_get_transformation_summary (cs->caller);
5690 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5691 ipcp_param_lattices *orig_plats
5692 = ipa_get_parm_lattices (orig_info, src_idx);
5693 if (ts
5694 && orig_plats->aggs
5695 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5697 ipa_argagg_value_list src (ts);
5698 src.push_adjusted_values (src_idx, index, unit_delta, res);
5699 return;
5702 else
5704 ipcp_param_lattices *src_plats
5705 = ipa_get_parm_lattices (caller_info, src_idx);
5706 if (src_plats->aggs
5707 && !src_plats->aggs_bottom
5708 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5710 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5712 interim->push_adjusted_values (src_idx, index, unit_delta,
5713 res);
5714 return;
5716 if (!src_plats->aggs_contain_variable)
5718 push_agg_values_from_plats (src_plats, index, unit_delta,
5719 res);
5720 return;
5726 if (!jfunc->agg.items)
5727 return;
5728 bool first = true;
5729 unsigned prev_unit_offset = 0;
5730 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5732 tree value, srcvalue;
5733 /* Besides simple pass-through aggregate jump function, arithmetic
5734 aggregate jump function could also bring same aggregate value as
5735 parameter passed-in for self-feeding recursive call. For example,
5737 fn (int *i)
5739 int j = *i & 1;
5740 fn (&j);
5743 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5744 if (interim
5745 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5746 && (srcvalue = interim->get_value(index,
5747 agg_jf.offset / BITS_PER_UNIT)))
5748 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5749 srcvalue,
5750 agg_jf.value.pass_through.operand,
5751 agg_jf.type);
5752 else
5753 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5754 &agg_jf);
5755 if (value)
5757 struct ipa_argagg_value iav;
5758 iav.value = value;
5759 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5760 iav.index = index;
5761 iav.by_ref = jfunc->agg.by_ref;
5763 gcc_assert (first
5764 || iav.unit_offset > prev_unit_offset);
5765 prev_unit_offset = iav.unit_offset;
5766 first = false;
5768 res->safe_push (iav);
5771 return;
5774 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5775 description of ultimate callee of CS or the one it was cloned from (the
5776 summary where lattices are). If INTERIM is non-NULL, it contains the
5777 current interim state of collected aggregate values which can be used to
5778 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5779 is true) and to skip values which clearly will not be part of intersection
5780 with INTERIM. */
5782 static void
5783 push_agg_values_from_edge (struct cgraph_edge *cs,
5784 ipa_node_params *dest_info,
5785 vec<ipa_argagg_value> *res,
5786 const ipa_argagg_value_list *interim,
5787 bool optimize_self_recursion)
5789 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5790 if (!args)
5791 return;
5793 int count = MIN (ipa_get_param_count (dest_info),
5794 ipa_get_cs_argument_count (args));
5796 unsigned interim_index = 0;
5797 for (int index = 0; index < count; index++)
5799 if (interim)
5801 while (interim_index < interim->m_elts.size ()
5802 && interim->m_elts[interim_index].value
5803 && interim->m_elts[interim_index].index < index)
5804 interim_index++;
5805 if (interim_index >= interim->m_elts.size ()
5806 || interim->m_elts[interim_index].index > index)
5807 continue;
5810 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5811 if (!ipa_is_param_used (dest_info, index)
5812 || plats->aggs_bottom)
5813 continue;
5814 push_agg_values_for_index_from_edge (cs, index, res,
5815 optimize_self_recursion ? interim
5816 : NULL);
5821 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5822 from all of them. Return nullptr if there are none. */
5824 static struct vec<ipa_argagg_value, va_gc> *
5825 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5826 const vec<cgraph_edge *> &callers)
5828 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5829 if (dest_info->ipcp_orig_node)
5830 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5832 /* gather_edges_for_value puts a non-recursive call into the first element of
5833 callers if it can. */
5834 auto_vec<ipa_argagg_value, 32> interim;
5835 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5837 unsigned valid_entries = interim.length ();
5838 if (!valid_entries)
5839 return nullptr;
5841 unsigned caller_count = callers.length();
5842 for (unsigned i = 1; i < caller_count; i++)
5844 auto_vec<ipa_argagg_value, 32> last;
5845 ipa_argagg_value_list avs (&interim);
5846 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5848 valid_entries = intersect_argaggs_with (interim, last);
5849 if (!valid_entries)
5850 return nullptr;
5853 vec<ipa_argagg_value, va_gc> *res = NULL;
5854 vec_safe_reserve_exact (res, valid_entries);
5855 for (const ipa_argagg_value &av : interim)
5856 if (av.value)
5857 res->quick_push(av);
5858 gcc_checking_assert (res->length () == valid_entries);
5859 return res;
5862 /* Determine whether CS also brings all scalar values that the NODE is
5863 specialized for. */
5865 static bool
5866 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5867 struct cgraph_node *node)
5869 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5870 int count = ipa_get_param_count (dest_info);
5871 class ipa_node_params *caller_info;
5872 class ipa_edge_args *args;
5873 int i;
5875 caller_info = ipa_node_params_sum->get (cs->caller);
5876 args = ipa_edge_args_sum->get (cs);
5877 for (i = 0; i < count; i++)
5879 struct ipa_jump_func *jump_func;
5880 tree val, t;
5882 val = dest_info->known_csts[i];
5883 if (!val)
5884 continue;
5886 if (i >= ipa_get_cs_argument_count (args))
5887 return false;
5888 jump_func = ipa_get_ith_jump_func (args, i);
5889 t = ipa_value_from_jfunc (caller_info, jump_func,
5890 ipa_get_type (dest_info, i));
5891 if (!t || !values_equal_for_ipcp_p (val, t))
5892 return false;
5894 return true;
5897 /* Determine whether CS also brings all aggregate values that NODE is
5898 specialized for. */
5900 static bool
5901 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5902 struct cgraph_node *node)
5904 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5905 if (!ts || vec_safe_is_empty (ts->m_agg_values))
5906 return true;
5908 const ipa_argagg_value_list existing (ts->m_agg_values);
5909 auto_vec<ipa_argagg_value, 32> edge_values;
5910 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5911 gcc_checking_assert (dest_info->ipcp_orig_node);
5912 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5913 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5914 const ipa_argagg_value_list avl (&edge_values);
5915 return avl.superset_of_p (existing);
5918 /* Given an original NODE and a VAL for which we have already created a
5919 specialized clone, look whether there are incoming edges that still lead
5920 into the old node but now also bring the requested value and also conform to
5921 all other criteria such that they can be redirected the special node.
5922 This function can therefore redirect the final edge in a SCC. */
5924 template <typename valtype>
5925 static void
5926 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5928 ipcp_value_source<valtype> *src;
5929 profile_count redirected_sum = profile_count::zero ();
5931 for (src = val->sources; src; src = src->next)
5933 struct cgraph_edge *cs = src->cs;
5934 while (cs)
5936 if (cgraph_edge_brings_value_p (cs, src, node, val)
5937 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5938 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5940 if (dump_file)
5941 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5942 cs->caller->dump_name (),
5943 val->spec_node->dump_name ());
5945 cs->redirect_callee_duplicating_thunks (val->spec_node);
5946 val->spec_node->expand_all_artificial_thunks ();
5947 if (cs->count.ipa ().initialized_p ())
5948 redirected_sum = redirected_sum + cs->count.ipa ();
5950 cs = get_next_cgraph_edge_clone (cs);
5954 if (redirected_sum.nonzero_p ())
5955 update_specialized_profile (val->spec_node, node, redirected_sum);
5958 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5960 static bool
5961 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5963 ipa_polymorphic_call_context *ctx;
5964 int i;
5966 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5967 if (!ctx->useless_p ())
5968 return true;
5969 return false;
5972 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5974 static vec<ipa_polymorphic_call_context>
5975 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5977 if (known_contexts_useful_p (known_contexts))
5978 return known_contexts.copy ();
5979 else
5980 return vNULL;
5983 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5984 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5985 copy too. */
5987 static void
5988 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5989 vec<tree> *known_csts,
5990 vec<ipa_polymorphic_call_context> *known_contexts,
5991 ipcp_value<tree> *val, int index)
5993 *known_csts = avals->m_known_vals.copy ();
5994 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5995 (*known_csts)[index] = val->value;
5998 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5999 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6000 INDEX. */
6002 static void
6003 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6004 vec<tree> *known_csts,
6005 vec<ipa_polymorphic_call_context> *known_contexts,
6006 ipcp_value<ipa_polymorphic_call_context> *val,
6007 int index)
6009 *known_csts = avals->m_known_vals.copy ();
6010 *known_contexts = avals->m_known_contexts.copy ();
6011 (*known_contexts)[index] = val->value;
6014 /* Return true if OFFSET indicates this was not an aggregate value or there is
6015 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6016 AGGVALS list. */
6018 DEBUG_FUNCTION bool
6019 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
6020 int index, HOST_WIDE_INT offset, tree value)
6022 if (offset == -1)
6023 return true;
6025 const ipa_argagg_value_list avl (aggvals);
6026 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
6027 return v && values_equal_for_ipcp_p (v, value);
6030 /* Return true if offset is minus one because source of a polymorphic context
6031 cannot be an aggregate value. */
6033 DEBUG_FUNCTION bool
6034 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
6035 int , HOST_WIDE_INT offset,
6036 ipa_polymorphic_call_context)
6038 return offset == -1;
6041 /* Decide whether to create a special version of NODE for value VAL of
6042 parameter at the given INDEX. If OFFSET is -1, the value is for the
6043 parameter itself, otherwise it is stored at the given OFFSET of the
6044 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6045 is a vector which contains clones created for self-recursive calls with an
6046 arithmetic pass-through jump function. */
6048 template <typename valtype>
6049 static bool
6050 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6051 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6052 vec<cgraph_node *> *self_gen_clones)
6054 int caller_count;
6055 sreal freq_sum;
6056 profile_count count_sum, rec_count_sum;
6057 vec<cgraph_edge *> callers;
6059 if (val->spec_node)
6061 perhaps_add_new_callers (node, val);
6062 return false;
6064 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6066 if (dump_file && (dump_flags & TDF_DETAILS))
6067 fprintf (dump_file, " Ignoring candidate value because "
6068 "maximum unit size would be reached with %li.\n",
6069 val->local_size_cost + overall_size);
6070 return false;
6072 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6073 &rec_count_sum, &count_sum))
6074 return false;
6076 if (!dbg_cnt (ipa_cp_values))
6077 return false;
6079 if (val->self_recursion_generated_p ())
6081 /* The edge counts in this case might not have been adjusted yet.
6082 Nevertleless, even if they were it would be only a guesswork which we
6083 can do now. The recursive part of the counts can be derived from the
6084 count of the original node anyway. */
6085 if (node->count.ipa ().nonzero_p ())
6087 unsigned dem = self_gen_clones->length () + 1;
6088 rec_count_sum = node->count.ipa () / dem;
6090 else
6091 rec_count_sum = profile_count::zero ();
6094 /* get_info_about_necessary_edges only sums up ipa counts. */
6095 count_sum += rec_count_sum;
6097 if (dump_file && (dump_flags & TDF_DETAILS))
6099 fprintf (dump_file, " - considering value ");
6100 print_ipcp_constant_value (dump_file, val->value);
6101 fprintf (dump_file, " for ");
6102 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6103 if (offset != -1)
6104 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6105 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6108 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6109 freq_sum, count_sum,
6110 val->local_size_cost)
6111 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6112 freq_sum, count_sum, val->prop_size_cost))
6113 return false;
6115 if (dump_file)
6116 fprintf (dump_file, " Creating a specialized node of %s.\n",
6117 node->dump_name ());
6119 vec<tree> known_csts;
6120 vec<ipa_polymorphic_call_context> known_contexts;
6122 callers = gather_edges_for_value (val, node, caller_count);
6123 if (offset == -1)
6124 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6125 else
6127 known_csts = avals->m_known_vals.copy ();
6128 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6130 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6131 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6132 vec<ipa_argagg_value, va_gc> *aggvals
6133 = find_aggregate_values_for_callers_subset (node, callers);
6134 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6135 offset, val->value));
6136 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6137 aggvals, callers);
6139 if (val->self_recursion_generated_p ())
6140 self_gen_clones->safe_push (val->spec_node);
6141 else
6142 update_profiling_info (node, val->spec_node);
6144 callers.release ();
6145 overall_size += val->local_size_cost;
6146 if (dump_file && (dump_flags & TDF_DETAILS))
6147 fprintf (dump_file, " overall size reached %li\n",
6148 overall_size);
6150 /* TODO: If for some lattice there is only one other known value
6151 left, make a special node for it too. */
6153 return true;
6156 /* Decide whether and what specialized clones of NODE should be created. */
6158 static bool
6159 decide_whether_version_node (struct cgraph_node *node)
6161 ipa_node_params *info = ipa_node_params_sum->get (node);
6162 int i, count = ipa_get_param_count (info);
6163 bool ret = false;
6165 if (count == 0)
6166 return false;
6168 if (dump_file && (dump_flags & TDF_DETAILS))
6169 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6170 node->dump_name ());
6172 auto_vec <cgraph_node *, 9> self_gen_clones;
6173 ipa_auto_call_arg_values avals;
6174 gather_context_independent_values (info, &avals, false, NULL);
6176 for (i = 0; i < count;i++)
6178 if (!ipa_is_param_used (info, i))
6179 continue;
6181 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6182 ipcp_lattice<tree> *lat = &plats->itself;
6183 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6185 if (!lat->bottom
6186 && !avals.m_known_vals[i])
6188 ipcp_value<tree> *val;
6189 for (val = lat->values; val; val = val->next)
6191 /* If some values generated for self-recursive calls with
6192 arithmetic jump functions fall outside of the known
6193 value_range for the parameter, we can skip them. VR interface
6194 supports this only for integers now. */
6195 if (TREE_CODE (val->value) == INTEGER_CST
6196 && !plats->m_value_range.bottom_p ()
6197 && !plats->m_value_range.m_vr.contains_p (val->value))
6199 /* This can happen also if a constant present in the source
6200 code falls outside of the range of parameter's type, so we
6201 cannot assert. */
6202 if (dump_file && (dump_flags & TDF_DETAILS))
6204 fprintf (dump_file, " - skipping%s value ",
6205 val->self_recursion_generated_p ()
6206 ? " self_recursion_generated" : "");
6207 print_ipcp_constant_value (dump_file, val->value);
6208 fprintf (dump_file, " because it is outside known "
6209 "value range.\n");
6211 continue;
6213 ret |= decide_about_value (node, i, -1, val, &avals,
6214 &self_gen_clones);
6218 if (!plats->aggs_bottom)
6220 struct ipcp_agg_lattice *aglat;
6221 ipcp_value<tree> *val;
6222 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6223 if (!aglat->bottom && aglat->values
6224 /* If the following is false, the one value has been considered
6225 for cloning for all contexts. */
6226 && (plats->aggs_contain_variable
6227 || !aglat->is_single_const ()))
6228 for (val = aglat->values; val; val = val->next)
6229 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6230 &self_gen_clones);
6233 if (!ctxlat->bottom
6234 && avals.m_known_contexts[i].useless_p ())
6236 ipcp_value<ipa_polymorphic_call_context> *val;
6237 for (val = ctxlat->values; val; val = val->next)
6238 ret |= decide_about_value (node, i, -1, val, &avals,
6239 &self_gen_clones);
6243 if (!self_gen_clones.is_empty ())
6245 self_gen_clones.safe_push (node);
6246 update_counts_for_self_gen_clones (node, self_gen_clones);
6249 if (info->do_clone_for_all_contexts)
6251 if (!dbg_cnt (ipa_cp_values))
6253 info->do_clone_for_all_contexts = false;
6254 return ret;
6257 struct cgraph_node *clone;
6258 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6260 for (int i = callers.length () - 1; i >= 0; i--)
6262 cgraph_edge *cs = callers[i];
6263 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6265 if (caller_info && caller_info->node_dead)
6266 callers.unordered_remove (i);
6269 if (!adjust_callers_for_value_intersection (callers, node))
6271 /* If node is not called by anyone, or all its caller edges are
6272 self-recursive, the node is not really in use, no need to do
6273 cloning. */
6274 info->do_clone_for_all_contexts = false;
6275 return ret;
6278 if (dump_file)
6279 fprintf (dump_file, " - Creating a specialized node of %s "
6280 "for all known contexts.\n", node->dump_name ());
6282 vec<tree> known_csts = avals.m_known_vals.copy ();
6283 vec<ipa_polymorphic_call_context> known_contexts
6284 = copy_useful_known_contexts (avals.m_known_contexts);
6285 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6286 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6287 vec<ipa_argagg_value, va_gc> *aggvals
6288 = find_aggregate_values_for_callers_subset (node, callers);
6290 if (!known_contexts_useful_p (known_contexts))
6292 known_contexts.release ();
6293 known_contexts = vNULL;
6295 clone = create_specialized_node (node, known_csts, known_contexts,
6296 aggvals, callers);
6297 info->do_clone_for_all_contexts = false;
6298 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6299 ret = true;
6302 return ret;
6305 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6307 static void
6308 spread_undeadness (struct cgraph_node *node)
6310 struct cgraph_edge *cs;
6312 for (cs = node->callees; cs; cs = cs->next_callee)
6313 if (ipa_edge_within_scc (cs))
6315 struct cgraph_node *callee;
6316 class ipa_node_params *info;
6318 callee = cs->callee->function_symbol (NULL);
6319 info = ipa_node_params_sum->get (callee);
6321 if (info && info->node_dead)
6323 info->node_dead = 0;
6324 spread_undeadness (callee);
6329 /* Return true if NODE has a caller from outside of its SCC that is not
6330 dead. Worker callback for cgraph_for_node_and_aliases. */
6332 static bool
6333 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6334 void *data ATTRIBUTE_UNUSED)
6336 struct cgraph_edge *cs;
6338 for (cs = node->callers; cs; cs = cs->next_caller)
6339 if (cs->caller->thunk
6340 && cs->caller->call_for_symbol_thunks_and_aliases
6341 (has_undead_caller_from_outside_scc_p, NULL, true))
6342 return true;
6343 else if (!ipa_edge_within_scc (cs))
6345 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6346 if (!caller_info /* Unoptimized caller are like dead ones. */
6347 || !caller_info->node_dead)
6348 return true;
6350 return false;
6354 /* Identify nodes within the same SCC as NODE which are no longer needed
6355 because of new clones and will be removed as unreachable. */
6357 static void
6358 identify_dead_nodes (struct cgraph_node *node)
6360 struct cgraph_node *v;
6361 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6362 if (v->local)
6364 ipa_node_params *info = ipa_node_params_sum->get (v);
6365 if (info
6366 && !v->call_for_symbol_thunks_and_aliases
6367 (has_undead_caller_from_outside_scc_p, NULL, true))
6368 info->node_dead = 1;
6371 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6373 ipa_node_params *info = ipa_node_params_sum->get (v);
6374 if (info && !info->node_dead)
6375 spread_undeadness (v);
6378 if (dump_file && (dump_flags & TDF_DETAILS))
6380 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6381 if (ipa_node_params_sum->get (v)
6382 && ipa_node_params_sum->get (v)->node_dead)
6383 fprintf (dump_file, " Marking node as dead: %s.\n",
6384 v->dump_name ());
6388 /* The decision stage. Iterate over the topological order of call graph nodes
6389 TOPO and make specialized clones if deemed beneficial. */
6391 static void
6392 ipcp_decision_stage (class ipa_topo_info *topo)
6394 int i;
6396 if (dump_file)
6397 fprintf (dump_file, "\nIPA decision stage:\n\n");
6399 for (i = topo->nnodes - 1; i >= 0; i--)
6401 struct cgraph_node *node = topo->order[i];
6402 bool change = false, iterate = true;
6404 while (iterate)
6406 struct cgraph_node *v;
6407 iterate = false;
6408 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6409 if (v->has_gimple_body_p ()
6410 && ipcp_versionable_function_p (v))
6411 iterate |= decide_whether_version_node (v);
6413 change |= iterate;
6415 if (change)
6416 identify_dead_nodes (node);
6420 /* Look up all the bits information that we have discovered and copy it over
6421 to the transformation summary. */
6423 static void
6424 ipcp_store_bits_results (void)
6426 cgraph_node *node;
6428 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6430 ipa_node_params *info = ipa_node_params_sum->get (node);
6431 bool dumped_sth = false;
6432 bool found_useful_result = false;
6434 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6436 if (dump_file)
6437 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6438 "; -fipa-bit-cp: disabled.\n",
6439 node->dump_name ());
6440 continue;
6443 if (info->ipcp_orig_node)
6444 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6445 if (!info->lattices)
6446 /* Newly expanded artificial thunks do not have lattices. */
6447 continue;
6449 unsigned count = ipa_get_param_count (info);
6450 for (unsigned i = 0; i < count; i++)
6452 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6453 if (plats->bits_lattice.constant_p ())
6455 found_useful_result = true;
6456 break;
6460 if (!found_useful_result)
6461 continue;
6463 ipcp_transformation_initialize ();
6464 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6465 vec_safe_reserve_exact (ts->bits, count);
6467 for (unsigned i = 0; i < count; i++)
6469 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6470 ipa_bits *jfbits;
6472 if (plats->bits_lattice.constant_p ())
6474 jfbits
6475 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6476 plats->bits_lattice.get_mask ());
6477 if (!dbg_cnt (ipa_cp_bits))
6478 jfbits = NULL;
6480 else
6481 jfbits = NULL;
6483 ts->bits->quick_push (jfbits);
6484 if (!dump_file || !jfbits)
6485 continue;
6486 if (!dumped_sth)
6488 fprintf (dump_file, "Propagated bits info for function %s:\n",
6489 node->dump_name ());
6490 dumped_sth = true;
6492 fprintf (dump_file, " param %i: value = ", i);
6493 print_hex (jfbits->value, dump_file);
6494 fprintf (dump_file, ", mask = ");
6495 print_hex (jfbits->mask, dump_file);
6496 fprintf (dump_file, "\n");
6501 /* Look up all VR information that we have discovered and copy it over
6502 to the transformation summary. */
6504 static void
6505 ipcp_store_vr_results (void)
6507 cgraph_node *node;
6509 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6511 ipa_node_params *info = ipa_node_params_sum->get (node);
6512 bool found_useful_result = false;
6514 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6516 if (dump_file)
6517 fprintf (dump_file, "Not considering %s for VR discovery "
6518 "and propagate; -fipa-ipa-vrp: disabled.\n",
6519 node->dump_name ());
6520 continue;
6523 if (info->ipcp_orig_node)
6524 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6525 if (!info->lattices)
6526 /* Newly expanded artificial thunks do not have lattices. */
6527 continue;
6529 unsigned count = ipa_get_param_count (info);
6530 for (unsigned i = 0; i < count; i++)
6532 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6533 if (!plats->m_value_range.bottom_p ()
6534 && !plats->m_value_range.top_p ())
6536 found_useful_result = true;
6537 break;
6540 if (!found_useful_result)
6541 continue;
6543 ipcp_transformation_initialize ();
6544 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6545 vec_safe_reserve_exact (ts->m_vr, count);
6547 for (unsigned i = 0; i < count; i++)
6549 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6550 ipa_vr vr;
6552 if (!plats->m_value_range.bottom_p ()
6553 && !plats->m_value_range.top_p ()
6554 && dbg_cnt (ipa_cp_vr))
6556 vr.known = true;
6557 vr.type = plats->m_value_range.m_vr.kind ();
6558 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6559 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6561 else
6563 vr.known = false;
6564 vr.type = VR_VARYING;
6565 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6567 ts->m_vr->quick_push (vr);
6572 /* The IPCP driver. */
6574 static unsigned int
6575 ipcp_driver (void)
6577 class ipa_topo_info topo;
6579 if (edge_clone_summaries == NULL)
6580 edge_clone_summaries = new edge_clone_summary_t (symtab);
6582 ipa_check_create_node_params ();
6583 ipa_check_create_edge_args ();
6584 clone_num_suffixes = new hash_map<const char *, unsigned>;
6586 if (dump_file)
6588 fprintf (dump_file, "\nIPA structures before propagation:\n");
6589 if (dump_flags & TDF_DETAILS)
6590 ipa_print_all_params (dump_file);
6591 ipa_print_all_jump_functions (dump_file);
6594 /* Topological sort. */
6595 build_toporder_info (&topo);
6596 /* Do the interprocedural propagation. */
6597 ipcp_propagate_stage (&topo);
6598 /* Decide what constant propagation and cloning should be performed. */
6599 ipcp_decision_stage (&topo);
6600 /* Store results of bits propagation. */
6601 ipcp_store_bits_results ();
6602 /* Store results of value range propagation. */
6603 ipcp_store_vr_results ();
6605 /* Free all IPCP structures. */
6606 delete clone_num_suffixes;
6607 free_toporder_info (&topo);
6608 delete edge_clone_summaries;
6609 edge_clone_summaries = NULL;
6610 ipa_free_all_structures_after_ipa_cp ();
6611 if (dump_file)
6612 fprintf (dump_file, "\nIPA constant propagation end\n");
6613 return 0;
6616 /* Initialization and computation of IPCP data structures. This is the initial
6617 intraprocedural analysis of functions, which gathers information to be
6618 propagated later on. */
6620 static void
6621 ipcp_generate_summary (void)
6623 struct cgraph_node *node;
6625 if (dump_file)
6626 fprintf (dump_file, "\nIPA constant propagation start:\n");
6627 ipa_register_cgraph_hooks ();
6629 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6630 ipa_analyze_node (node);
6633 namespace {
6635 const pass_data pass_data_ipa_cp =
6637 IPA_PASS, /* type */
6638 "cp", /* name */
6639 OPTGROUP_NONE, /* optinfo_flags */
6640 TV_IPA_CONSTANT_PROP, /* tv_id */
6641 0, /* properties_required */
6642 0, /* properties_provided */
6643 0, /* properties_destroyed */
6644 0, /* todo_flags_start */
6645 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6648 class pass_ipa_cp : public ipa_opt_pass_d
6650 public:
6651 pass_ipa_cp (gcc::context *ctxt)
6652 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6653 ipcp_generate_summary, /* generate_summary */
6654 NULL, /* write_summary */
6655 NULL, /* read_summary */
6656 ipcp_write_transformation_summaries, /*
6657 write_optimization_summary */
6658 ipcp_read_transformation_summaries, /*
6659 read_optimization_summary */
6660 NULL, /* stmt_fixup */
6661 0, /* function_transform_todo_flags_start */
6662 ipcp_transform_function, /* function_transform */
6663 NULL) /* variable_transform */
6666 /* opt_pass methods: */
6667 bool gate (function *) final override
6669 /* FIXME: We should remove the optimize check after we ensure we never run
6670 IPA passes when not optimizing. */
6671 return (flag_ipa_cp && optimize) || in_lto_p;
6674 unsigned int execute (function *) final override { return ipcp_driver (); }
6676 }; // class pass_ipa_cp
6678 } // anon namespace
6680 ipa_opt_pass_d *
6681 make_pass_ipa_cp (gcc::context *ctxt)
6683 return new pass_ipa_cp (ctxt);
6686 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6687 within the same process. For use by toplev::finalize. */
6689 void
6690 ipa_cp_cc_finalize (void)
6692 base_count = profile_count::uninitialized ();
6693 overall_size = 0;
6694 orig_overall_size = 0;
6695 ipcp_free_transformation_sum ();