tree-optimization/114485 - neg induction with partial vectors
[official-gcc.git] / gcc / ipa-cp.cc
blob2a1da631e9cad81aafa4a1ff2e3019991669eaaf
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "sreal.h"
113 #include "alloc-pool.h"
114 #include "tree-pass.h"
115 #include "cgraph.h"
116 #include "diagnostic.h"
117 #include "fold-const.h"
118 #include "gimple-iterator.h"
119 #include "gimple-fold.h"
120 #include "symbol-summary.h"
121 #include "tree-vrp.h"
122 #include "ipa-cp.h"
123 #include "ipa-prop.h"
124 #include "tree-pretty-print.h"
125 #include "tree-inline.h"
126 #include "ipa-fnsummary.h"
127 #include "ipa-utils.h"
128 #include "tree-ssa-ccp.h"
129 #include "stringpool.h"
130 #include "attribs.h"
131 #include "dbgcnt.h"
132 #include "symtab-clones.h"
133 #include "gimple-range.h"
136 /* Allocation pools for values and their sources in ipa-cp. */
138 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
141 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
144 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 ("IPA-CP value sources");
147 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
150 /* Base count to use in heuristics when using profile feedback. */
152 static profile_count base_count;
154 /* Original overall size of the program. */
156 static long overall_size, orig_overall_size;
158 /* Node name to unique clone suffix number map. */
159 static hash_map<const char *, unsigned> *clone_num_suffixes;
161 /* Return the param lattices structure corresponding to the Ith formal
162 parameter of the function described by INFO. */
163 static inline class ipcp_param_lattices *
164 ipa_get_parm_lattices (class ipa_node_params *info, int i)
166 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
167 gcc_checking_assert (!info->ipcp_orig_node);
168 return &(info->lattices[i]);
171 /* Return the lattice corresponding to the scalar value of the Ith formal
172 parameter of the function described by INFO. */
173 static inline ipcp_lattice<tree> *
174 ipa_get_scalar_lat (class ipa_node_params *info, int i)
176 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
177 return &plats->itself;
180 /* Return the lattice corresponding to the scalar value of the Ith formal
181 parameter of the function described by INFO. */
182 static inline ipcp_lattice<ipa_polymorphic_call_context> *
183 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
185 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
186 return &plats->ctxlat;
189 /* Return whether LAT is a lattice with a single constant and without an
190 undefined value. */
192 template <typename valtype>
193 inline bool
194 ipcp_lattice<valtype>::is_single_const ()
196 if (bottom || contains_variable || values_count != 1)
197 return false;
198 else
199 return true;
202 /* Return true iff X and Y should be considered equal values by IPA-CP. */
204 static bool
205 values_equal_for_ipcp_p (tree x, tree y)
207 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
209 if (x == y)
210 return true;
212 if (TREE_CODE (x) == ADDR_EXPR
213 && TREE_CODE (y) == ADDR_EXPR
214 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
215 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
216 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
217 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
218 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
219 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
220 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
221 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
222 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
223 else
224 return operand_equal_p (x, y, 0);
227 /* Print V which is extracted from a value in a lattice to F. */
229 static void
230 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
232 v.dump(f, false);
235 /* Print a lattice LAT to F. */
237 template <typename valtype>
238 void
239 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
241 ipcp_value<valtype> *val;
242 bool prev = false;
244 if (bottom)
246 fprintf (f, "BOTTOM\n");
247 return;
250 if (!values_count && !contains_variable)
252 fprintf (f, "TOP\n");
253 return;
256 if (contains_variable)
258 fprintf (f, "VARIABLE");
259 prev = true;
260 if (dump_benefits)
261 fprintf (f, "\n");
264 for (val = values; val; val = val->next)
266 if (dump_benefits && prev)
267 fprintf (f, " ");
268 else if (!dump_benefits && prev)
269 fprintf (f, ", ");
270 else
271 prev = true;
273 print_ipcp_constant_value (f, val->value);
275 if (dump_sources)
277 ipcp_value_source<valtype> *s;
279 if (val->self_recursion_generated_p ())
280 fprintf (f, " [self_gen(%i), from:",
281 val->self_recursion_generated_level);
282 else
283 fprintf (f, " [scc: %i, from:", val->scc_no);
284 for (s = val->sources; s; s = s->next)
285 fprintf (f, " %i(%f)", s->cs->caller->order,
286 s->cs->sreal_frequency ().to_double ());
287 fprintf (f, "]");
290 if (dump_benefits)
291 fprintf (f, " [loc_time: %g, loc_size: %i, "
292 "prop_time: %g, prop_size: %i]\n",
293 val->local_time_benefit.to_double (), val->local_size_cost,
294 val->prop_time_benefit.to_double (), val->prop_size_cost);
296 if (!dump_benefits)
297 fprintf (f, "\n");
300 void
301 ipcp_bits_lattice::print (FILE *f)
303 if (top_p ())
304 fprintf (f, " Bits unknown (TOP)\n");
305 else if (bottom_p ())
306 fprintf (f, " Bits unusable (BOTTOM)\n");
307 else
309 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
310 fprintf (f, ", mask = "); print_hex (get_mask (), f);
311 fprintf (f, "\n");
315 /* Print value range lattice to F. */
317 void
318 ipcp_vr_lattice::print (FILE * f)
320 m_vr.dump (f);
323 /* Print all ipcp_lattices of all functions to F. */
325 static void
326 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
328 struct cgraph_node *node;
329 int i, count;
331 fprintf (f, "\nLattices:\n");
332 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
334 class ipa_node_params *info;
336 info = ipa_node_params_sum->get (node);
337 /* Skip unoptimized functions and constprop clones since we don't make
338 lattices for them. */
339 if (!info || info->ipcp_orig_node)
340 continue;
341 fprintf (f, " Node: %s:\n", node->dump_name ());
342 count = ipa_get_param_count (info);
343 for (i = 0; i < count; i++)
345 struct ipcp_agg_lattice *aglat;
346 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
347 fprintf (f, " param [%d]: ", i);
348 plats->itself.print (f, dump_sources, dump_benefits);
349 fprintf (f, " ctxs: ");
350 plats->ctxlat.print (f, dump_sources, dump_benefits);
351 plats->bits_lattice.print (f);
352 fprintf (f, " ");
353 plats->m_value_range.print (f);
354 fprintf (f, "\n");
355 if (plats->virt_call)
356 fprintf (f, " virt_call flag set\n");
358 if (plats->aggs_bottom)
360 fprintf (f, " AGGS BOTTOM\n");
361 continue;
363 if (plats->aggs_contain_variable)
364 fprintf (f, " AGGS VARIABLE\n");
365 for (aglat = plats->aggs; aglat; aglat = aglat->next)
367 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
368 plats->aggs_by_ref ? "ref " : "", aglat->offset);
369 aglat->print (f, dump_sources, dump_benefits);
375 /* Determine whether it is at all technically possible to create clones of NODE
376 and store this information in the ipa_node_params structure associated
377 with NODE. */
379 static void
380 determine_versionability (struct cgraph_node *node,
381 class ipa_node_params *info)
383 const char *reason = NULL;
385 /* There are a number of generic reasons functions cannot be versioned. We
386 also cannot remove parameters if there are type attributes such as fnspec
387 present. */
388 if (node->alias || node->thunk)
389 reason = "alias or thunk";
390 else if (!node->versionable)
391 reason = "not a tree_versionable_function";
392 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
393 reason = "insufficient body availability";
394 else if (!opt_for_fn (node->decl, optimize)
395 || !opt_for_fn (node->decl, flag_ipa_cp))
396 reason = "non-optimized function";
397 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
399 /* Ideally we should clone the SIMD clones themselves and create
400 vector copies of them, so IPA-cp and SIMD clones can happily
401 coexist, but that may not be worth the effort. */
402 reason = "function has SIMD clones";
404 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
406 /* Ideally we should clone the target clones themselves and create
407 copies of them, so IPA-cp and target clones can happily
408 coexist, but that may not be worth the effort. */
409 reason = "function target_clones attribute";
411 /* Don't clone decls local to a comdat group; it breaks and for C++
412 decloned constructors, inlining is always better anyway. */
413 else if (node->comdat_local_p ())
414 reason = "comdat-local function";
415 else if (node->calls_comdat_local)
417 /* TODO: call is versionable if we make sure that all
418 callers are inside of a comdat group. */
419 reason = "calls comdat-local function";
422 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
423 work only when inlined. Cloning them may still lead to better code
424 because ipa-cp will not give up on cloning further. If the function is
425 external this however leads to wrong code because we may end up producing
426 offline copy of the function. */
427 if (DECL_EXTERNAL (node->decl))
428 for (cgraph_edge *edge = node->callees; !reason && edge;
429 edge = edge->next_callee)
430 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
432 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
433 reason = "external function which calls va_arg_pack";
434 if (DECL_FUNCTION_CODE (edge->callee->decl)
435 == BUILT_IN_VA_ARG_PACK_LEN)
436 reason = "external function which calls va_arg_pack_len";
439 if (reason && dump_file && !node->alias && !node->thunk)
440 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
441 node->dump_name (), reason);
443 info->versionable = (reason == NULL);
446 /* Return true if it is at all technically possible to create clones of a
447 NODE. */
449 static bool
450 ipcp_versionable_function_p (struct cgraph_node *node)
452 ipa_node_params *info = ipa_node_params_sum->get (node);
453 return info && info->versionable;
456 /* Structure holding accumulated information about callers of a node. */
458 struct caller_statistics
460 /* If requested (see below), self-recursive call counts are summed into this
461 field. */
462 profile_count rec_count_sum;
463 /* The sum of all ipa counts of all the other (non-recursive) calls. */
464 profile_count count_sum;
465 /* Sum of all frequencies for all calls. */
466 sreal freq_sum;
467 /* Number of calls and hot calls respectively. */
468 int n_calls, n_hot_calls;
469 /* If itself is set up, also count the number of non-self-recursive
470 calls. */
471 int n_nonrec_calls;
472 /* If non-NULL, this is the node itself and calls from it should have their
473 counts included in rec_count_sum and not count_sum. */
474 cgraph_node *itself;
477 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
478 from IGNORED_CALLER are not counted. */
480 static inline void
481 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
483 stats->rec_count_sum = profile_count::zero ();
484 stats->count_sum = profile_count::zero ();
485 stats->n_calls = 0;
486 stats->n_hot_calls = 0;
487 stats->n_nonrec_calls = 0;
488 stats->freq_sum = 0;
489 stats->itself = itself;
492 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
493 non-thunk incoming edges to NODE. */
495 static bool
496 gather_caller_stats (struct cgraph_node *node, void *data)
498 struct caller_statistics *stats = (struct caller_statistics *) data;
499 struct cgraph_edge *cs;
501 for (cs = node->callers; cs; cs = cs->next_caller)
502 if (!cs->caller->thunk)
504 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
505 if (info && info->node_dead)
506 continue;
508 if (cs->count.ipa ().initialized_p ())
510 if (stats->itself && stats->itself == cs->caller)
511 stats->rec_count_sum += cs->count.ipa ();
512 else
513 stats->count_sum += cs->count.ipa ();
515 stats->freq_sum += cs->sreal_frequency ();
516 stats->n_calls++;
517 if (stats->itself && stats->itself != cs->caller)
518 stats->n_nonrec_calls++;
520 if (cs->maybe_hot_p ())
521 stats->n_hot_calls ++;
523 return false;
527 /* Return true if this NODE is viable candidate for cloning. */
529 static bool
530 ipcp_cloning_candidate_p (struct cgraph_node *node)
532 struct caller_statistics stats;
534 gcc_checking_assert (node->has_gimple_body_p ());
536 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
538 if (dump_file)
539 fprintf (dump_file, "Not considering %s for cloning; "
540 "-fipa-cp-clone disabled.\n",
541 node->dump_name ());
542 return false;
545 if (node->optimize_for_size_p ())
547 if (dump_file)
548 fprintf (dump_file, "Not considering %s for cloning; "
549 "optimizing it for size.\n",
550 node->dump_name ());
551 return false;
554 init_caller_stats (&stats);
555 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
557 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
559 if (dump_file)
560 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
561 node->dump_name ());
562 return true;
565 /* When profile is available and function is hot, propagate into it even if
566 calls seems cold; constant propagation can improve function's speed
567 significantly. */
568 if (stats.count_sum > profile_count::zero ()
569 && node->count.ipa ().initialized_p ())
571 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
573 if (dump_file)
574 fprintf (dump_file, "Considering %s for cloning; "
575 "usually called directly.\n",
576 node->dump_name ());
577 return true;
580 if (!stats.n_hot_calls)
582 if (dump_file)
583 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
584 node->dump_name ());
585 return false;
587 if (dump_file)
588 fprintf (dump_file, "Considering %s for cloning.\n",
589 node->dump_name ());
590 return true;
593 template <typename valtype>
594 class value_topo_info
596 public:
597 /* Head of the linked list of topologically sorted values. */
598 ipcp_value<valtype> *values_topo;
599 /* Stack for creating SCCs, represented by a linked list too. */
600 ipcp_value<valtype> *stack;
601 /* Counter driving the algorithm in add_val_to_toposort. */
602 int dfs_counter;
604 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
606 void add_val (ipcp_value<valtype> *cur_val);
607 void propagate_effects ();
610 /* Arrays representing a topological ordering of call graph nodes and a stack
611 of nodes used during constant propagation and also data required to perform
612 topological sort of values and propagation of benefits in the determined
613 order. */
615 class ipa_topo_info
617 public:
618 /* Array with obtained topological order of cgraph nodes. */
619 struct cgraph_node **order;
620 /* Stack of cgraph nodes used during propagation within SCC until all values
621 in the SCC stabilize. */
622 struct cgraph_node **stack;
623 int nnodes, stack_top;
625 value_topo_info<tree> constants;
626 value_topo_info<ipa_polymorphic_call_context> contexts;
628 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
629 constants ()
633 /* Skip edges from and to nodes without ipa_cp enabled.
634 Ignore not available symbols. */
636 static bool
637 ignore_edge_p (cgraph_edge *e)
639 enum availability avail;
640 cgraph_node *ultimate_target
641 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
643 return (avail <= AVAIL_INTERPOSABLE
644 || !opt_for_fn (ultimate_target->decl, optimize)
645 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
648 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
650 static void
651 build_toporder_info (class ipa_topo_info *topo)
653 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
654 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
656 gcc_checking_assert (topo->stack_top == 0);
657 topo->nnodes = ipa_reduced_postorder (topo->order, true,
658 ignore_edge_p);
661 /* Free information about strongly connected components and the arrays in
662 TOPO. */
664 static void
665 free_toporder_info (class ipa_topo_info *topo)
667 ipa_free_postorder_info ();
668 free (topo->order);
669 free (topo->stack);
672 /* Add NODE to the stack in TOPO, unless it is already there. */
674 static inline void
675 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
677 ipa_node_params *info = ipa_node_params_sum->get (node);
678 if (info->node_enqueued)
679 return;
680 info->node_enqueued = 1;
681 topo->stack[topo->stack_top++] = node;
684 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
685 is empty. */
687 static struct cgraph_node *
688 pop_node_from_stack (class ipa_topo_info *topo)
690 if (topo->stack_top)
692 struct cgraph_node *node;
693 topo->stack_top--;
694 node = topo->stack[topo->stack_top];
695 ipa_node_params_sum->get (node)->node_enqueued = 0;
696 return node;
698 else
699 return NULL;
702 /* Set lattice LAT to bottom and return true if it previously was not set as
703 such. */
705 template <typename valtype>
706 inline bool
707 ipcp_lattice<valtype>::set_to_bottom ()
709 bool ret = !bottom;
710 bottom = true;
711 return ret;
714 /* Mark lattice as containing an unknown value and return true if it previously
715 was not marked as such. */
717 template <typename valtype>
718 inline bool
719 ipcp_lattice<valtype>::set_contains_variable ()
721 bool ret = !contains_variable;
722 contains_variable = true;
723 return ret;
726 /* Set all aggregate lattices in PLATS to bottom and return true if they were
727 not previously set as such. */
729 static inline bool
730 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
732 bool ret = !plats->aggs_bottom;
733 plats->aggs_bottom = true;
734 return ret;
737 /* Mark all aggregate lattices in PLATS as containing an unknown value and
738 return true if they were not previously marked as such. */
740 static inline bool
741 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
743 bool ret = !plats->aggs_contain_variable;
744 plats->aggs_contain_variable = true;
745 return ret;
748 bool
749 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
751 return meet_with_1 (other.m_vr);
754 /* Meet the current value of the lattice with the range described by
755 P_VR. */
757 bool
758 ipcp_vr_lattice::meet_with (const vrange &p_vr)
760 return meet_with_1 (p_vr);
763 /* Meet the current value of the lattice with the range described by
764 OTHER_VR. Return TRUE if anything changed. */
766 bool
767 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
769 if (bottom_p ())
770 return false;
772 if (other_vr.varying_p ())
773 return set_to_bottom ();
775 bool res;
776 if (flag_checking)
778 Value_Range save (m_vr);
779 res = m_vr.union_ (other_vr);
780 gcc_assert (res == (m_vr != save));
782 else
783 res = m_vr.union_ (other_vr);
784 return res;
787 /* Return true if value range information in the lattice is yet unknown. */
789 bool
790 ipcp_vr_lattice::top_p () const
792 return m_vr.undefined_p ();
795 /* Return true if value range information in the lattice is known to be
796 unusable. */
798 bool
799 ipcp_vr_lattice::bottom_p () const
801 return m_vr.varying_p ();
804 /* Set value range information in the lattice to bottom. Return true if it
805 previously was in a different state. */
807 bool
808 ipcp_vr_lattice::set_to_bottom ()
810 if (m_vr.varying_p ())
811 return false;
813 /* Setting an unsupported type here forces the temporary to default
814 to unsupported_range, which can handle VARYING/DEFINED ranges,
815 but nothing else (union, intersect, etc). This allows us to set
816 bottoms on any ranges, and is safe as all users of the lattice
817 check for bottom first. */
818 m_vr.set_type (void_type_node);
819 m_vr.set_varying (void_type_node);
821 return true;
824 /* Set lattice value to bottom, if it already isn't the case. */
826 bool
827 ipcp_bits_lattice::set_to_bottom ()
829 if (bottom_p ())
830 return false;
831 m_lattice_val = IPA_BITS_VARYING;
832 m_value = 0;
833 m_mask = -1;
834 return true;
837 /* Set to constant if it isn't already. Only meant to be called
838 when switching state from TOP. */
840 bool
841 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
843 gcc_assert (top_p ());
844 m_lattice_val = IPA_BITS_CONSTANT;
845 m_value = wi::bit_and (wi::bit_not (mask), value);
846 m_mask = mask;
847 return true;
850 /* Return true if any of the known bits are non-zero. */
852 bool
853 ipcp_bits_lattice::known_nonzero_p () const
855 if (!constant_p ())
856 return false;
857 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
860 /* Convert operand to value, mask form. */
862 void
863 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
865 wide_int get_nonzero_bits (const_tree);
867 if (TREE_CODE (operand) == INTEGER_CST)
869 *valuep = wi::to_widest (operand);
870 *maskp = 0;
872 else
874 *valuep = 0;
875 *maskp = -1;
879 /* Meet operation, similar to ccp_lattice_meet, we xor values
880 if this->value, value have different values at same bit positions, we want
881 to drop that bit to varying. Return true if mask is changed.
882 This function assumes that the lattice value is in CONSTANT state. If
883 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
885 bool
886 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
887 unsigned precision, bool drop_all_ones)
889 gcc_assert (constant_p ());
891 widest_int old_mask = m_mask;
892 m_mask = (m_mask | mask) | (m_value ^ value);
893 if (drop_all_ones)
894 m_mask |= m_value;
895 m_value &= ~m_mask;
897 if (wi::sext (m_mask, precision) == -1)
898 return set_to_bottom ();
900 return m_mask != old_mask;
903 /* Meet the bits lattice with operand
904 described by <value, mask, sgn, precision. */
906 bool
907 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
908 unsigned precision)
910 if (bottom_p ())
911 return false;
913 if (top_p ())
915 if (wi::sext (mask, precision) == -1)
916 return set_to_bottom ();
917 return set_to_constant (value, mask);
920 return meet_with_1 (value, mask, precision, false);
923 /* Meet bits lattice with the result of bit_value_binop (other, operand)
924 if code is binary operation or bit_value_unop (other) if code is unary op.
925 In the case when code is nop_expr, no adjustment is required. If
926 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
928 bool
929 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
930 signop sgn, enum tree_code code, tree operand,
931 bool drop_all_ones)
933 if (other.bottom_p ())
934 return set_to_bottom ();
936 if (bottom_p () || other.top_p ())
937 return false;
939 widest_int adjusted_value, adjusted_mask;
941 if (TREE_CODE_CLASS (code) == tcc_binary)
943 tree type = TREE_TYPE (operand);
944 widest_int o_value, o_mask;
945 get_value_and_mask (operand, &o_value, &o_mask);
947 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
948 sgn, precision, other.get_value (), other.get_mask (),
949 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
951 if (wi::sext (adjusted_mask, precision) == -1)
952 return set_to_bottom ();
955 else if (TREE_CODE_CLASS (code) == tcc_unary)
957 bit_value_unop (code, sgn, precision, &adjusted_value,
958 &adjusted_mask, sgn, precision, other.get_value (),
959 other.get_mask ());
961 if (wi::sext (adjusted_mask, precision) == -1)
962 return set_to_bottom ();
965 else
966 return set_to_bottom ();
968 if (top_p ())
970 if (drop_all_ones)
972 adjusted_mask |= adjusted_value;
973 adjusted_value &= ~adjusted_mask;
975 if (wi::sext (adjusted_mask, precision) == -1)
976 return set_to_bottom ();
977 return set_to_constant (adjusted_value, adjusted_mask);
979 else
980 return meet_with_1 (adjusted_value, adjusted_mask, precision,
981 drop_all_ones);
984 /* Dump the contents of the list to FILE. */
986 void
987 ipa_argagg_value_list::dump (FILE *f)
989 bool comma = false;
990 for (const ipa_argagg_value &av : m_elts)
992 fprintf (f, "%s %i[%u]=", comma ? "," : "",
993 av.index, av.unit_offset);
994 print_generic_expr (f, av.value);
995 if (av.by_ref)
996 fprintf (f, "(by_ref)");
997 if (av.killed)
998 fprintf (f, "(killed)");
999 comma = true;
1001 fprintf (f, "\n");
1004 /* Dump the contents of the list to stderr. */
1006 void
1007 ipa_argagg_value_list::debug ()
1009 dump (stderr);
1012 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1013 NULL if there is no such constant. */
1015 const ipa_argagg_value *
1016 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1018 ipa_argagg_value key;
1019 key.index = index;
1020 key.unit_offset = unit_offset;
1021 const ipa_argagg_value *res
1022 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1023 [] (const ipa_argagg_value &elt,
1024 const ipa_argagg_value &val)
1026 if (elt.index < val.index)
1027 return true;
1028 if (elt.index > val.index)
1029 return false;
1030 if (elt.unit_offset < val.unit_offset)
1031 return true;
1032 return false;
1035 if (res == m_elts.end ()
1036 || res->index != index
1037 || res->unit_offset != unit_offset)
1038 res = nullptr;
1040 /* TODO: perhaps remove the check (that the underlying array is indeed
1041 sorted) if it turns out it can be too slow? */
1042 if (!flag_checking)
1043 return res;
1045 const ipa_argagg_value *slow_res = NULL;
1046 int prev_index = -1;
1047 unsigned prev_unit_offset = 0;
1048 for (const ipa_argagg_value &av : m_elts)
1050 gcc_assert (prev_index < 0
1051 || prev_index < av.index
1052 || prev_unit_offset < av.unit_offset);
1053 prev_index = av.index;
1054 prev_unit_offset = av.unit_offset;
1055 if (av.index == index
1056 && av.unit_offset == unit_offset)
1057 slow_res = &av;
1059 gcc_assert (res == slow_res);
1061 return res;
1064 /* Return the first item describing a constant stored for parameter with INDEX,
1065 regardless of offset or reference, or NULL if there is no such constant. */
1067 const ipa_argagg_value *
1068 ipa_argagg_value_list::get_elt_for_index (int index) const
1070 const ipa_argagg_value *res
1071 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1072 [] (const ipa_argagg_value &elt, unsigned idx)
1074 return elt.index < idx;
1076 if (res == m_elts.end ()
1077 || res->index != index)
1078 res = nullptr;
1079 return res;
1082 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1083 performing any check of whether value is passed by reference, or NULL_TREE
1084 if there is no such constant. */
1086 tree
1087 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1089 const ipa_argagg_value *av = get_elt (index, unit_offset);
1090 return av ? av->value : NULL_TREE;
1093 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1094 passed by reference or not according to BY_REF, or NULL_TREE if there is
1095 no such constant. */
1097 tree
1098 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1099 bool by_ref) const
1101 const ipa_argagg_value *av = get_elt (index, unit_offset);
1102 if (av && av->by_ref == by_ref)
1103 return av->value;
1104 return NULL_TREE;
1107 /* Return true if all elements present in OTHER are also present in this
1108 list. */
1110 bool
1111 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1113 unsigned j = 0;
1114 for (unsigned i = 0; i < other.m_elts.size (); i++)
1116 unsigned other_index = other.m_elts[i].index;
1117 unsigned other_offset = other.m_elts[i].unit_offset;
1119 while (j < m_elts.size ()
1120 && (m_elts[j].index < other_index
1121 || (m_elts[j].index == other_index
1122 && m_elts[j].unit_offset < other_offset)))
1123 j++;
1125 if (j >= m_elts.size ()
1126 || m_elts[j].index != other_index
1127 || m_elts[j].unit_offset != other_offset
1128 || m_elts[j].by_ref != other.m_elts[i].by_ref
1129 || !m_elts[j].value
1130 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1131 return false;
1133 return true;
1136 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1137 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1138 offsets but skip those which would end up with a negative offset. */
1140 void
1141 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1142 unsigned dest_index,
1143 unsigned unit_delta,
1144 vec<ipa_argagg_value> *res) const
1146 const ipa_argagg_value *av = get_elt_for_index (src_index);
1147 if (!av)
1148 return;
1149 unsigned prev_unit_offset = 0;
1150 bool first = true;
1151 for (; av < m_elts.end (); ++av)
1153 if (av->index > src_index)
1154 return;
1155 if (av->index == src_index
1156 && (av->unit_offset >= unit_delta)
1157 && av->value)
1159 ipa_argagg_value new_av;
1160 gcc_checking_assert (av->value);
1161 new_av.value = av->value;
1162 new_av.unit_offset = av->unit_offset - unit_delta;
1163 new_av.index = dest_index;
1164 new_av.by_ref = av->by_ref;
1165 gcc_assert (!av->killed);
1166 new_av.killed = false;
1168 /* Quick check that the offsets we push are indeed increasing. */
1169 gcc_assert (first
1170 || new_av.unit_offset > prev_unit_offset);
1171 prev_unit_offset = new_av.unit_offset;
1172 first = false;
1174 res->safe_push (new_av);
1179 /* Push to RES information about single lattices describing aggregate values in
1180 PLATS as those describing parameter DEST_INDEX and the original offset minus
1181 UNIT_DELTA. Return true if any item has been pushed to RES. */
1183 static bool
1184 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1185 unsigned unit_delta,
1186 vec<ipa_argagg_value> *res)
1188 if (plats->aggs_contain_variable)
1189 return false;
1191 bool pushed_sth = false;
1192 bool first = true;
1193 unsigned prev_unit_offset = 0;
1194 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1195 if (aglat->is_single_const ()
1196 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1198 ipa_argagg_value iav;
1199 iav.value = aglat->values->value;
1200 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1201 iav.index = dest_index;
1202 iav.by_ref = plats->aggs_by_ref;
1203 iav.killed = false;
1205 gcc_assert (first
1206 || iav.unit_offset > prev_unit_offset);
1207 prev_unit_offset = iav.unit_offset;
1208 first = false;
1210 pushed_sth = true;
1211 res->safe_push (iav);
1213 return pushed_sth;
1216 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1217 Return the number of remaining valid entries. */
1219 static unsigned
1220 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1221 const vec<ipa_argagg_value> &other)
1223 unsigned valid_entries = 0;
1224 unsigned j = 0;
1225 for (unsigned i = 0; i < elts.length (); i++)
1227 if (!elts[i].value)
1228 continue;
1230 unsigned this_index = elts[i].index;
1231 unsigned this_offset = elts[i].unit_offset;
1233 while (j < other.length ()
1234 && (other[j].index < this_index
1235 || (other[j].index == this_index
1236 && other[j].unit_offset < this_offset)))
1237 j++;
1239 if (j >= other.length ())
1241 elts[i].value = NULL_TREE;
1242 continue;
1245 if (other[j].index == this_index
1246 && other[j].unit_offset == this_offset
1247 && other[j].by_ref == elts[i].by_ref
1248 && other[j].value
1249 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1250 valid_entries++;
1251 else
1252 elts[i].value = NULL_TREE;
1254 return valid_entries;
1257 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1258 return true is any of them has not been marked as such so far. */
1260 static inline bool
1261 set_all_contains_variable (class ipcp_param_lattices *plats)
1263 bool ret;
1264 ret = plats->itself.set_contains_variable ();
1265 ret |= plats->ctxlat.set_contains_variable ();
1266 ret |= set_agg_lats_contain_variable (plats);
1267 ret |= plats->bits_lattice.set_to_bottom ();
1268 ret |= plats->m_value_range.set_to_bottom ();
1269 return ret;
1272 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1273 points to by the number of callers to NODE. */
1275 static bool
1276 count_callers (cgraph_node *node, void *data)
1278 int *caller_count = (int *) data;
1280 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1281 /* Local thunks can be handled transparently, but if the thunk cannot
1282 be optimized out, count it as a real use. */
1283 if (!cs->caller->thunk || !cs->caller->local)
1284 ++*caller_count;
1285 return false;
1288 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1289 the one caller of some other node. Set the caller's corresponding flag. */
1291 static bool
1292 set_single_call_flag (cgraph_node *node, void *)
1294 cgraph_edge *cs = node->callers;
1295 /* Local thunks can be handled transparently, skip them. */
1296 while (cs && cs->caller->thunk && cs->caller->local)
1297 cs = cs->next_caller;
1298 if (cs)
1299 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1301 info->node_calling_single_call = true;
1302 return true;
1304 return false;
1307 /* Initialize ipcp_lattices. */
1309 static void
1310 initialize_node_lattices (struct cgraph_node *node)
1312 ipa_node_params *info = ipa_node_params_sum->get (node);
1313 struct cgraph_edge *ie;
1314 bool disable = false, variable = false;
1315 int i;
1317 gcc_checking_assert (node->has_gimple_body_p ());
1319 if (!ipa_get_param_count (info))
1320 disable = true;
1321 else if (node->local)
1323 int caller_count = 0;
1324 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1325 true);
1326 gcc_checking_assert (caller_count > 0);
1327 if (caller_count == 1)
1328 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1329 NULL, true);
1331 else
1333 /* When cloning is allowed, we can assume that externally visible
1334 functions are not called. We will compensate this by cloning
1335 later. */
1336 if (ipcp_versionable_function_p (node)
1337 && ipcp_cloning_candidate_p (node))
1338 variable = true;
1339 else
1340 disable = true;
1343 if (dump_file && (dump_flags & TDF_DETAILS)
1344 && !node->alias && !node->thunk)
1346 fprintf (dump_file, "Initializing lattices of %s\n",
1347 node->dump_name ());
1348 if (disable || variable)
1349 fprintf (dump_file, " Marking all lattices as %s\n",
1350 disable ? "BOTTOM" : "VARIABLE");
1353 auto_vec<bool, 16> surviving_params;
1354 bool pre_modified = false;
1356 clone_info *cinfo = clone_info::get (node);
1358 if (!disable && cinfo && cinfo->param_adjustments)
1360 /* At the moment all IPA optimizations should use the number of
1361 parameters of the prevailing decl as the m_always_copy_start.
1362 Handling any other value would complicate the code below, so for the
1363 time bing let's only assert it is so. */
1364 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1365 == ipa_get_param_count (info))
1366 || cinfo->param_adjustments->m_always_copy_start < 0);
1368 pre_modified = true;
1369 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1371 if (dump_file && (dump_flags & TDF_DETAILS)
1372 && !node->alias && !node->thunk)
1374 bool first = true;
1375 for (int j = 0; j < ipa_get_param_count (info); j++)
1377 if (j < (int) surviving_params.length ()
1378 && surviving_params[j])
1379 continue;
1380 if (first)
1382 fprintf (dump_file,
1383 " The following parameters are dead on arrival:");
1384 first = false;
1386 fprintf (dump_file, " %u", j);
1388 if (!first)
1389 fprintf (dump_file, "\n");
1393 for (i = 0; i < ipa_get_param_count (info); i++)
1395 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1396 tree type = ipa_get_type (info, i);
1397 if (disable
1398 || !ipa_get_type (info, i)
1399 || (pre_modified && (surviving_params.length () <= (unsigned) i
1400 || !surviving_params[i])))
1402 plats->itself.set_to_bottom ();
1403 plats->ctxlat.set_to_bottom ();
1404 set_agg_lats_to_bottom (plats);
1405 plats->bits_lattice.set_to_bottom ();
1406 plats->m_value_range.init (type);
1407 plats->m_value_range.set_to_bottom ();
1409 else
1411 plats->m_value_range.init (type);
1412 if (variable)
1413 set_all_contains_variable (plats);
1417 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1418 if (ie->indirect_info->polymorphic
1419 && ie->indirect_info->param_index >= 0)
1421 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1422 ipa_get_parm_lattices (info,
1423 ie->indirect_info->param_index)->virt_call = 1;
1427 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1428 PARAM_TYPE. */
1430 static bool
1431 ipacp_value_safe_for_type (tree param_type, tree value)
1433 tree val_type = TREE_TYPE (value);
1434 if (param_type == val_type
1435 || useless_type_conversion_p (param_type, val_type)
1436 || fold_convertible_p (param_type, value))
1437 return true;
1438 else
1439 return false;
1442 /* Return the result of a (possibly arithmetic) operation on the constant
1443 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1444 the type of the parameter to which the result is passed. Return
1445 NULL_TREE if that cannot be determined or be considered an
1446 interprocedural invariant. */
1448 static tree
1449 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1450 tree res_type)
1452 tree res;
1454 if (opcode == NOP_EXPR)
1455 return input;
1456 if (!is_gimple_ip_invariant (input))
1457 return NULL_TREE;
1459 if (opcode == ASSERT_EXPR)
1461 if (values_equal_for_ipcp_p (input, operand))
1462 return input;
1463 else
1464 return NULL_TREE;
1467 if (!res_type)
1469 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1470 res_type = boolean_type_node;
1471 else if (expr_type_first_operand_type_p (opcode))
1472 res_type = TREE_TYPE (input);
1473 else
1474 return NULL_TREE;
1477 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1478 res = fold_unary (opcode, res_type, input);
1479 else
1480 res = fold_binary (opcode, res_type, input, operand);
1482 if (res && !is_gimple_ip_invariant (res))
1483 return NULL_TREE;
1485 return res;
1488 /* Return the result of a (possibly arithmetic) pass through jump function
1489 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1490 to which the result is passed. Return NULL_TREE if that cannot be
1491 determined or be considered an interprocedural invariant. */
1493 static tree
1494 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1495 tree res_type)
1497 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1498 input,
1499 ipa_get_jf_pass_through_operand (jfunc),
1500 res_type);
1503 /* Return the result of an ancestor jump function JFUNC on the constant value
1504 INPUT. Return NULL_TREE if that cannot be determined. */
1506 static tree
1507 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1509 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1510 if (TREE_CODE (input) == ADDR_EXPR)
1512 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1513 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1514 if (known_eq (off, 0))
1515 return input;
1516 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1517 return build1 (ADDR_EXPR, TREE_TYPE (input),
1518 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1519 build_int_cst (ptr_type_node, byte_offset)));
1521 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1522 && zerop (input))
1523 return input;
1524 else
1525 return NULL_TREE;
1528 /* Determine whether JFUNC evaluates to a single known constant value and if
1529 so, return it. Otherwise return NULL. INFO describes the caller node or
1530 the one it is inlined to, so that pass-through jump functions can be
1531 evaluated. PARM_TYPE is the type of the parameter to which the result is
1532 passed. */
1534 tree
1535 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1536 tree parm_type)
1538 if (jfunc->type == IPA_JF_CONST)
1539 return ipa_get_jf_constant (jfunc);
1540 else if (jfunc->type == IPA_JF_PASS_THROUGH
1541 || jfunc->type == IPA_JF_ANCESTOR)
1543 tree input;
1544 int idx;
1546 if (jfunc->type == IPA_JF_PASS_THROUGH)
1547 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1548 else
1549 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1551 if (info->ipcp_orig_node)
1552 input = info->known_csts[idx];
1553 else
1555 ipcp_lattice<tree> *lat;
1557 if (info->lattices.is_empty ()
1558 || idx >= ipa_get_param_count (info))
1559 return NULL_TREE;
1560 lat = ipa_get_scalar_lat (info, idx);
1561 if (!lat->is_single_const ())
1562 return NULL_TREE;
1563 input = lat->values->value;
1566 if (!input)
1567 return NULL_TREE;
1569 if (jfunc->type == IPA_JF_PASS_THROUGH)
1570 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1571 else
1572 return ipa_get_jf_ancestor_result (jfunc, input);
1574 else
1575 return NULL_TREE;
1578 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1579 that INFO describes the caller node or the one it is inlined to, CS is the
1580 call graph edge corresponding to JFUNC and CSIDX index of the described
1581 parameter. */
1583 ipa_polymorphic_call_context
1584 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1585 ipa_jump_func *jfunc)
1587 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1588 ipa_polymorphic_call_context ctx;
1589 ipa_polymorphic_call_context *edge_ctx
1590 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1592 if (edge_ctx && !edge_ctx->useless_p ())
1593 ctx = *edge_ctx;
1595 if (jfunc->type == IPA_JF_PASS_THROUGH
1596 || jfunc->type == IPA_JF_ANCESTOR)
1598 ipa_polymorphic_call_context srcctx;
1599 int srcidx;
1600 bool type_preserved = true;
1601 if (jfunc->type == IPA_JF_PASS_THROUGH)
1603 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1604 return ctx;
1605 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1606 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1608 else
1610 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1611 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1613 if (info->ipcp_orig_node)
1615 if (info->known_contexts.exists ())
1616 srcctx = info->known_contexts[srcidx];
1618 else
1620 if (info->lattices.is_empty ()
1621 || srcidx >= ipa_get_param_count (info))
1622 return ctx;
1623 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1624 lat = ipa_get_poly_ctx_lat (info, srcidx);
1625 if (!lat->is_single_const ())
1626 return ctx;
1627 srcctx = lat->values->value;
1629 if (srcctx.useless_p ())
1630 return ctx;
1631 if (jfunc->type == IPA_JF_ANCESTOR)
1632 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1633 if (!type_preserved)
1634 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1635 srcctx.combine_with (ctx);
1636 return srcctx;
1639 return ctx;
1642 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1643 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1644 the result is a range that is not VARYING nor UNDEFINED. */
1646 static bool
1647 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1648 const vrange &src_vr,
1649 enum tree_code operation,
1650 tree dst_type, tree src_type)
1652 if (!irange::supports_p (dst_type) || !irange::supports_p (src_type))
1653 return false;
1655 range_op_handler handler (operation);
1656 if (!handler)
1657 return false;
1659 Value_Range varying (dst_type);
1660 varying.set_varying (dst_type);
1662 return (handler.operand_check_p (dst_type, src_type, dst_type)
1663 && handler.fold_range (dst_vr, dst_type, src_vr, varying)
1664 && !dst_vr.varying_p ()
1665 && !dst_vr.undefined_p ());
1668 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1669 first be extracted onto a vrange. */
1671 static bool
1672 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1673 const ipa_vr &src_vr,
1674 enum tree_code operation,
1675 tree dst_type, tree src_type)
1677 Value_Range tmp;
1678 src_vr.get_vrange (tmp);
1679 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1680 dst_type, src_type);
1683 /* Determine range of JFUNC given that INFO describes the caller node or
1684 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1685 and PARM_TYPE of the parameter. */
1687 void
1688 ipa_value_range_from_jfunc (vrange &vr,
1689 ipa_node_params *info, cgraph_edge *cs,
1690 ipa_jump_func *jfunc, tree parm_type)
1692 vr.set_undefined ();
1694 if (jfunc->m_vr)
1695 ipa_vr_operation_and_type_effects (vr,
1696 *jfunc->m_vr,
1697 NOP_EXPR, parm_type,
1698 jfunc->m_vr->type ());
1699 if (vr.singleton_p ())
1700 return;
1701 if (jfunc->type == IPA_JF_PASS_THROUGH)
1703 int idx;
1704 ipcp_transformation *sum
1705 = ipcp_get_transformation_summary (cs->caller->inlined_to
1706 ? cs->caller->inlined_to
1707 : cs->caller);
1708 if (!sum || !sum->m_vr)
1709 return;
1711 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1713 if (!(*sum->m_vr)[idx].known_p ())
1714 return;
1715 tree vr_type = ipa_get_type (info, idx);
1716 Value_Range srcvr;
1717 (*sum->m_vr)[idx].get_vrange (srcvr);
1719 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1721 if (TREE_CODE_CLASS (operation) == tcc_unary)
1723 Value_Range res (vr_type);
1725 if (ipa_vr_operation_and_type_effects (res,
1726 srcvr,
1727 operation, parm_type,
1728 vr_type))
1729 vr.intersect (res);
1731 else
1733 Value_Range op_res (vr_type);
1734 Value_Range res (vr_type);
1735 tree op = ipa_get_jf_pass_through_operand (jfunc);
1736 Value_Range op_vr (vr_type);
1737 range_op_handler handler (operation);
1739 ipa_range_set_and_normalize (op_vr, op);
1741 if (!handler
1742 || !op_res.supports_type_p (vr_type)
1743 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
1744 op_res.set_varying (vr_type);
1746 if (ipa_vr_operation_and_type_effects (res,
1747 op_res,
1748 NOP_EXPR, parm_type,
1749 vr_type))
1750 vr.intersect (res);
1755 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1756 single known constant value and if so, return it. Otherwise return NULL.
1757 NODE and INFO describes the caller node or the one it is inlined to, and
1758 its related info. */
1760 tree
1761 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1762 const ipa_agg_jf_item *item)
1764 tree value = NULL_TREE;
1765 int src_idx;
1767 if (item->offset < 0
1768 || item->jftype == IPA_JF_UNKNOWN
1769 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1770 return NULL_TREE;
1772 if (item->jftype == IPA_JF_CONST)
1773 return item->value.constant;
1775 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1776 || item->jftype == IPA_JF_LOAD_AGG);
1778 src_idx = item->value.pass_through.formal_id;
1780 if (info->ipcp_orig_node)
1782 if (item->jftype == IPA_JF_PASS_THROUGH)
1783 value = info->known_csts[src_idx];
1784 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1786 ipa_argagg_value_list avl (ts);
1787 value = avl.get_value (src_idx,
1788 item->value.load_agg.offset / BITS_PER_UNIT,
1789 item->value.load_agg.by_ref);
1792 else if (!info->lattices.is_empty ())
1794 class ipcp_param_lattices *src_plats
1795 = ipa_get_parm_lattices (info, src_idx);
1797 if (item->jftype == IPA_JF_PASS_THROUGH)
1799 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1801 if (!lat->is_single_const ())
1802 return NULL_TREE;
1804 value = lat->values->value;
1806 else if (src_plats->aggs
1807 && !src_plats->aggs_bottom
1808 && !src_plats->aggs_contain_variable
1809 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1811 struct ipcp_agg_lattice *aglat;
1813 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1815 if (aglat->offset > item->value.load_agg.offset)
1816 break;
1818 if (aglat->offset == item->value.load_agg.offset)
1820 if (aglat->is_single_const ())
1821 value = aglat->values->value;
1822 break;
1828 if (!value)
1829 return NULL_TREE;
1831 if (item->jftype == IPA_JF_LOAD_AGG)
1833 tree load_type = item->value.load_agg.type;
1834 tree value_type = TREE_TYPE (value);
1836 /* Ensure value type is compatible with load type. */
1837 if (!useless_type_conversion_p (load_type, value_type))
1838 return NULL_TREE;
1841 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1842 value,
1843 item->value.pass_through.operand,
1844 item->type);
1847 /* Process all items in AGG_JFUNC relative to caller (or the node the original
1848 caller is inlined to) NODE which described by INFO and push the results to
1849 RES as describing values passed in parameter DST_INDEX. */
1851 void
1852 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
1853 ipa_agg_jump_function *agg_jfunc,
1854 unsigned dst_index,
1855 vec<ipa_argagg_value> *res)
1857 unsigned prev_unit_offset = 0;
1858 bool first = true;
1860 for (const ipa_agg_jf_item &item : agg_jfunc->items)
1862 tree value = ipa_agg_value_from_jfunc (info, node, &item);
1863 if (!value)
1864 continue;
1866 ipa_argagg_value iav;
1867 iav.value = value;
1868 iav.unit_offset = item.offset / BITS_PER_UNIT;
1869 iav.index = dst_index;
1870 iav.by_ref = agg_jfunc->by_ref;
1871 iav.killed = 0;
1873 gcc_assert (first
1874 || iav.unit_offset > prev_unit_offset);
1875 prev_unit_offset = iav.unit_offset;
1876 first = false;
1878 res->safe_push (iav);
1882 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1883 bottom, not containing a variable component and without any known value at
1884 the same time. */
1886 DEBUG_FUNCTION void
1887 ipcp_verify_propagated_values (void)
1889 struct cgraph_node *node;
1891 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1893 ipa_node_params *info = ipa_node_params_sum->get (node);
1894 if (!opt_for_fn (node->decl, flag_ipa_cp)
1895 || !opt_for_fn (node->decl, optimize))
1896 continue;
1897 int i, count = ipa_get_param_count (info);
1899 for (i = 0; i < count; i++)
1901 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1903 if (!lat->bottom
1904 && !lat->contains_variable
1905 && lat->values_count == 0)
1907 if (dump_file)
1909 symtab->dump (dump_file);
1910 fprintf (dump_file, "\nIPA lattices after constant "
1911 "propagation, before gcc_unreachable:\n");
1912 print_all_lattices (dump_file, true, false);
1915 gcc_unreachable ();
1921 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1923 static bool
1924 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1925 ipa_polymorphic_call_context y)
1927 return x.equal_to (y);
1931 /* Add a new value source to the value represented by THIS, marking that a
1932 value comes from edge CS and (if the underlying jump function is a
1933 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1934 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1935 scalar value of the parameter itself or the offset within an aggregate. */
1937 template <typename valtype>
1938 void
1939 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1940 int src_idx, HOST_WIDE_INT offset)
1942 ipcp_value_source<valtype> *src;
1944 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1945 src->offset = offset;
1946 src->cs = cs;
1947 src->val = src_val;
1948 src->index = src_idx;
1950 src->next = sources;
1951 sources = src;
1954 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1955 SOURCE and clear all other fields. */
1957 static ipcp_value<tree> *
1958 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1960 ipcp_value<tree> *val;
1962 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1963 val->value = cst;
1964 val->self_recursion_generated_level = same_lat_gen_level;
1965 return val;
1968 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1969 value to SOURCE and clear all other fields. */
1971 static ipcp_value<ipa_polymorphic_call_context> *
1972 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1973 unsigned same_lat_gen_level)
1975 ipcp_value<ipa_polymorphic_call_context> *val;
1977 val = new (ipcp_poly_ctx_values_pool.allocate ())
1978 ipcp_value<ipa_polymorphic_call_context>();
1979 val->value = ctx;
1980 val->self_recursion_generated_level = same_lat_gen_level;
1981 return val;
1984 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1985 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1986 meaning. OFFSET -1 means the source is scalar and not a part of an
1987 aggregate. If non-NULL, VAL_P records address of existing or newly added
1988 ipcp_value.
1990 If the value is generated for a self-recursive call as a result of an
1991 arithmetic pass-through jump-function acting on a value in the same lattice,
1992 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1993 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1995 template <typename valtype>
1996 bool
1997 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1998 ipcp_value<valtype> *src_val,
1999 int src_idx, HOST_WIDE_INT offset,
2000 ipcp_value<valtype> **val_p,
2001 unsigned same_lat_gen_level)
2003 ipcp_value<valtype> *val, *last_val = NULL;
2005 if (val_p)
2006 *val_p = NULL;
2008 if (bottom)
2009 return false;
2011 for (val = values; val; last_val = val, val = val->next)
2012 if (values_equal_for_ipcp_p (val->value, newval))
2014 if (val_p)
2015 *val_p = val;
2017 if (val->self_recursion_generated_level < same_lat_gen_level)
2018 val->self_recursion_generated_level = same_lat_gen_level;
2020 if (ipa_edge_within_scc (cs))
2022 ipcp_value_source<valtype> *s;
2023 for (s = val->sources; s; s = s->next)
2024 if (s->cs == cs && s->val == src_val)
2025 break;
2026 if (s)
2027 return false;
2030 val->add_source (cs, src_val, src_idx, offset);
2031 return false;
2034 if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2035 param_ipa_cp_value_list_size))
2037 /* We can only free sources, not the values themselves, because sources
2038 of other values in this SCC might point to them. */
2039 for (val = values; val; val = val->next)
2041 while (val->sources)
2043 ipcp_value_source<valtype> *src = val->sources;
2044 val->sources = src->next;
2045 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2048 values = NULL;
2049 return set_to_bottom ();
2052 values_count++;
2053 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2054 val->add_source (cs, src_val, src_idx, offset);
2055 val->next = NULL;
2057 /* Add the new value to end of value list, which can reduce iterations
2058 of propagation stage for recursive function. */
2059 if (last_val)
2060 last_val->next = val;
2061 else
2062 values = val;
2064 if (val_p)
2065 *val_p = val;
2067 return true;
2070 /* A helper function that returns result of operation specified by OPCODE on
2071 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2072 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2073 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2074 the result. */
2076 static tree
2077 get_val_across_arith_op (enum tree_code opcode,
2078 tree opnd1_type,
2079 tree opnd2,
2080 ipcp_value<tree> *src_val,
2081 tree res_type)
2083 tree opnd1 = src_val->value;
2085 /* Skip source values that is incompatible with specified type. */
2086 if (opnd1_type
2087 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2088 return NULL_TREE;
2090 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2093 /* Propagate values through an arithmetic transformation described by a jump
2094 function associated with edge CS, taking values from SRC_LAT and putting
2095 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2096 OPND2 is a constant value if transformation is a binary operation.
2097 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2098 a part of the aggregate. SRC_IDX is the index of the source parameter.
2099 RES_TYPE is the value type of result being propagated into. Return true if
2100 DEST_LAT changed. */
2102 static bool
2103 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2104 enum tree_code opcode,
2105 tree opnd1_type,
2106 tree opnd2,
2107 ipcp_lattice<tree> *src_lat,
2108 ipcp_lattice<tree> *dest_lat,
2109 HOST_WIDE_INT src_offset,
2110 int src_idx,
2111 tree res_type)
2113 ipcp_value<tree> *src_val;
2114 bool ret = false;
2116 /* Due to circular dependencies, propagating within an SCC through arithmetic
2117 transformation would create infinite number of values. But for
2118 self-feeding recursive function, we could allow propagation in a limited
2119 count, and this can enable a simple kind of recursive function versioning.
2120 For other scenario, we would just make lattices bottom. */
2121 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2123 int i;
2125 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2126 param_ipa_cp_max_recursive_depth);
2127 if (src_lat != dest_lat || max_recursive_depth < 1)
2128 return dest_lat->set_contains_variable ();
2130 /* No benefit if recursive execution is in low probability. */
2131 if (cs->sreal_frequency () * 100
2132 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2133 param_ipa_cp_min_recursive_probability))
2134 return dest_lat->set_contains_variable ();
2136 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2138 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2140 /* Now we do not use self-recursively generated value as propagation
2141 source, this is absolutely conservative, but could avoid explosion
2142 of lattice's value space, especially when one recursive function
2143 calls another recursive. */
2144 if (src_val->self_recursion_generated_p ())
2146 ipcp_value_source<tree> *s;
2148 /* If the lattice has already been propagated for the call site,
2149 no need to do that again. */
2150 for (s = src_val->sources; s; s = s->next)
2151 if (s->cs == cs)
2152 return dest_lat->set_contains_variable ();
2154 else
2155 val_seeds.safe_push (src_val);
2158 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2160 /* Recursively generate lattice values with a limited count. */
2161 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2163 for (int j = 1; j < max_recursive_depth; j++)
2165 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2166 src_val, res_type);
2167 if (!cstval
2168 || !ipacp_value_safe_for_type (res_type, cstval))
2169 break;
2171 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2172 src_offset, &src_val, j);
2173 gcc_checking_assert (src_val);
2176 ret |= dest_lat->set_contains_variable ();
2178 else
2179 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2181 /* Now we do not use self-recursively generated value as propagation
2182 source, otherwise it is easy to make value space of normal lattice
2183 overflow. */
2184 if (src_val->self_recursion_generated_p ())
2186 ret |= dest_lat->set_contains_variable ();
2187 continue;
2190 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2191 src_val, res_type);
2192 if (cstval
2193 && ipacp_value_safe_for_type (res_type, cstval))
2194 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2195 src_offset);
2196 else
2197 ret |= dest_lat->set_contains_variable ();
2200 return ret;
2203 /* Propagate values through a pass-through jump function JFUNC associated with
2204 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2205 is the index of the source parameter. PARM_TYPE is the type of the
2206 parameter to which the result is passed. */
2208 static bool
2209 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2210 ipcp_lattice<tree> *src_lat,
2211 ipcp_lattice<tree> *dest_lat, int src_idx,
2212 tree parm_type)
2214 return propagate_vals_across_arith_jfunc (cs,
2215 ipa_get_jf_pass_through_operation (jfunc),
2216 NULL_TREE,
2217 ipa_get_jf_pass_through_operand (jfunc),
2218 src_lat, dest_lat, -1, src_idx, parm_type);
2221 /* Propagate values through an ancestor jump function JFUNC associated with
2222 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2223 is the index of the source parameter. */
2225 static bool
2226 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2227 struct ipa_jump_func *jfunc,
2228 ipcp_lattice<tree> *src_lat,
2229 ipcp_lattice<tree> *dest_lat, int src_idx,
2230 tree param_type)
2232 ipcp_value<tree> *src_val;
2233 bool ret = false;
2235 if (ipa_edge_within_scc (cs))
2236 return dest_lat->set_contains_variable ();
2238 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2240 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2242 if (t && ipacp_value_safe_for_type (param_type, t))
2243 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2244 else
2245 ret |= dest_lat->set_contains_variable ();
2248 return ret;
2251 /* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2253 parameter to which the result is passed. */
2255 static bool
2256 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2257 struct ipa_jump_func *jfunc,
2258 ipcp_lattice<tree> *dest_lat,
2259 tree param_type)
2261 if (dest_lat->bottom)
2262 return false;
2264 if (jfunc->type == IPA_JF_CONST)
2266 tree val = ipa_get_jf_constant (jfunc);
2267 if (ipacp_value_safe_for_type (param_type, val))
2268 return dest_lat->add_value (val, cs, NULL, 0);
2269 else
2270 return dest_lat->set_contains_variable ();
2272 else if (jfunc->type == IPA_JF_PASS_THROUGH
2273 || jfunc->type == IPA_JF_ANCESTOR)
2275 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2276 ipcp_lattice<tree> *src_lat;
2277 int src_idx;
2278 bool ret;
2280 if (jfunc->type == IPA_JF_PASS_THROUGH)
2281 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2282 else
2283 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2285 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2286 if (src_lat->bottom)
2287 return dest_lat->set_contains_variable ();
2289 /* If we would need to clone the caller and cannot, do not propagate. */
2290 if (!ipcp_versionable_function_p (cs->caller)
2291 && (src_lat->contains_variable
2292 || (src_lat->values_count > 1)))
2293 return dest_lat->set_contains_variable ();
2295 if (jfunc->type == IPA_JF_PASS_THROUGH)
2296 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2297 dest_lat, src_idx,
2298 param_type);
2299 else
2300 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2301 src_idx, param_type);
2303 if (src_lat->contains_variable)
2304 ret |= dest_lat->set_contains_variable ();
2306 return ret;
2309 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2310 use it for indirect inlining), we should propagate them too. */
2311 return dest_lat->set_contains_variable ();
2314 /* Propagate scalar values across jump function JFUNC that is associated with
2315 edge CS and describes argument IDX and put the values into DEST_LAT. */
2317 static bool
2318 propagate_context_across_jump_function (cgraph_edge *cs,
2319 ipa_jump_func *jfunc, int idx,
2320 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2322 if (dest_lat->bottom)
2323 return false;
2324 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2325 bool ret = false;
2326 bool added_sth = false;
2327 bool type_preserved = true;
2329 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2330 = ipa_get_ith_polymorhic_call_context (args, idx);
2332 if (edge_ctx_ptr)
2333 edge_ctx = *edge_ctx_ptr;
2335 if (jfunc->type == IPA_JF_PASS_THROUGH
2336 || jfunc->type == IPA_JF_ANCESTOR)
2338 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2339 int src_idx;
2340 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2342 /* TODO: Once we figure out how to propagate speculations, it will
2343 probably be a good idea to switch to speculation if type_preserved is
2344 not set instead of punting. */
2345 if (jfunc->type == IPA_JF_PASS_THROUGH)
2347 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2348 goto prop_fail;
2349 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2350 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2352 else
2354 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2355 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2358 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2359 /* If we would need to clone the caller and cannot, do not propagate. */
2360 if (!ipcp_versionable_function_p (cs->caller)
2361 && (src_lat->contains_variable
2362 || (src_lat->values_count > 1)))
2363 goto prop_fail;
2365 ipcp_value<ipa_polymorphic_call_context> *src_val;
2366 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2368 ipa_polymorphic_call_context cur = src_val->value;
2370 if (!type_preserved)
2371 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2372 if (jfunc->type == IPA_JF_ANCESTOR)
2373 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2374 /* TODO: In cases we know how the context is going to be used,
2375 we can improve the result by passing proper OTR_TYPE. */
2376 cur.combine_with (edge_ctx);
2377 if (!cur.useless_p ())
2379 if (src_lat->contains_variable
2380 && !edge_ctx.equal_to (cur))
2381 ret |= dest_lat->set_contains_variable ();
2382 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2383 added_sth = true;
2388 prop_fail:
2389 if (!added_sth)
2391 if (!edge_ctx.useless_p ())
2392 ret |= dest_lat->add_value (edge_ctx, cs);
2393 else
2394 ret |= dest_lat->set_contains_variable ();
2397 return ret;
2400 /* Propagate bits across jfunc that is associated with
2401 edge cs and update dest_lattice accordingly. */
2403 bool
2404 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2405 ipa_jump_func *jfunc,
2406 ipcp_bits_lattice *dest_lattice)
2408 if (dest_lattice->bottom_p ())
2409 return false;
2411 enum availability availability;
2412 cgraph_node *callee = cs->callee->function_symbol (&availability);
2413 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2414 tree parm_type = ipa_get_type (callee_info, idx);
2416 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2417 transform for these cases. Similarly, we can have bad type mismatches
2418 with LTO, avoid doing anything with those too. */
2419 if (!parm_type
2420 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2424 "param %i of %s is NULL or unsuitable for bits propagation\n",
2425 idx, cs->callee->dump_name ());
2427 return dest_lattice->set_to_bottom ();
2430 unsigned precision = TYPE_PRECISION (parm_type);
2431 signop sgn = TYPE_SIGN (parm_type);
2433 if (jfunc->type == IPA_JF_PASS_THROUGH
2434 || jfunc->type == IPA_JF_ANCESTOR)
2436 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2437 tree operand = NULL_TREE;
2438 enum tree_code code;
2439 unsigned src_idx;
2440 bool keep_null = false;
2442 if (jfunc->type == IPA_JF_PASS_THROUGH)
2444 code = ipa_get_jf_pass_through_operation (jfunc);
2445 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2446 if (code != NOP_EXPR)
2447 operand = ipa_get_jf_pass_through_operand (jfunc);
2449 else
2451 code = POINTER_PLUS_EXPR;
2452 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2453 unsigned HOST_WIDE_INT offset
2454 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2455 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2456 operand = build_int_cstu (size_type_node, offset);
2459 class ipcp_param_lattices *src_lats
2460 = ipa_get_parm_lattices (caller_info, src_idx);
2462 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2463 for eg consider:
2464 int f(int x)
2466 g (x & 0xff);
2468 Assume lattice for x is bottom, however we can still propagate
2469 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2470 and we store it in jump function during analysis stage. */
2472 if (!src_lats->bits_lattice.bottom_p ())
2474 bool drop_all_ones
2475 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2477 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2478 sgn, code, operand, drop_all_ones);
2482 Value_Range vr (parm_type);
2483 if (jfunc->m_vr)
2485 jfunc->m_vr->get_vrange (vr);
2486 if (!vr.undefined_p () && !vr.varying_p ())
2488 irange &r = as_a <irange> (vr);
2489 irange_bitmask bm = r.get_bitmask ();
2490 widest_int mask
2491 = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2492 widest_int value
2493 = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2494 return dest_lattice->meet_with (value, mask, precision);
2497 return dest_lattice->set_to_bottom ();
2500 /* Propagate value range across jump function JFUNC that is associated with
2501 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2502 accordingly. */
2504 static bool
2505 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2506 class ipcp_param_lattices *dest_plats,
2507 tree param_type)
2509 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2511 if (dest_lat->bottom_p ())
2512 return false;
2514 if (!param_type
2515 || (!INTEGRAL_TYPE_P (param_type)
2516 && !POINTER_TYPE_P (param_type)))
2517 return dest_lat->set_to_bottom ();
2519 if (jfunc->type == IPA_JF_PASS_THROUGH)
2521 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2522 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2523 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2524 class ipcp_param_lattices *src_lats
2525 = ipa_get_parm_lattices (caller_info, src_idx);
2526 tree operand_type = ipa_get_type (caller_info, src_idx);
2528 if (src_lats->m_value_range.bottom_p ())
2529 return dest_lat->set_to_bottom ();
2531 Value_Range vr (operand_type);
2532 if (TREE_CODE_CLASS (operation) == tcc_unary)
2533 ipa_vr_operation_and_type_effects (vr,
2534 src_lats->m_value_range.m_vr,
2535 operation, param_type,
2536 operand_type);
2537 /* A crude way to prevent unbounded number of value range updates
2538 in SCC components. We should allow limited number of updates within
2539 SCC, too. */
2540 else if (!ipa_edge_within_scc (cs))
2542 tree op = ipa_get_jf_pass_through_operand (jfunc);
2543 Value_Range op_vr (TREE_TYPE (op));
2544 Value_Range op_res (operand_type);
2545 range_op_handler handler (operation);
2547 ipa_range_set_and_normalize (op_vr, op);
2549 if (!handler
2550 || !op_res.supports_type_p (operand_type)
2551 || !handler.fold_range (op_res, operand_type,
2552 src_lats->m_value_range.m_vr, op_vr))
2553 op_res.set_varying (operand_type);
2555 ipa_vr_operation_and_type_effects (vr,
2556 op_res,
2557 NOP_EXPR, param_type,
2558 operand_type);
2560 if (!vr.undefined_p () && !vr.varying_p ())
2562 if (jfunc->m_vr)
2564 Value_Range jvr (param_type);
2565 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2566 NOP_EXPR,
2567 param_type,
2568 jfunc->m_vr->type ()))
2569 vr.intersect (jvr);
2571 return dest_lat->meet_with (vr);
2574 else if (jfunc->type == IPA_JF_CONST)
2576 tree val = ipa_get_jf_constant (jfunc);
2577 if (TREE_CODE (val) == INTEGER_CST)
2579 val = fold_convert (param_type, val);
2580 if (TREE_OVERFLOW_P (val))
2581 val = drop_tree_overflow (val);
2583 Value_Range tmpvr (val, val);
2584 return dest_lat->meet_with (tmpvr);
2588 Value_Range vr (param_type);
2589 if (jfunc->m_vr
2590 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2591 param_type,
2592 jfunc->m_vr->type ()))
2593 return dest_lat->meet_with (vr);
2594 else
2595 return dest_lat->set_to_bottom ();
2598 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2599 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2600 other cases, return false). If there are no aggregate items, set
2601 aggs_by_ref to NEW_AGGS_BY_REF. */
2603 static bool
2604 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2605 bool new_aggs_by_ref)
2607 if (dest_plats->aggs)
2609 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2611 set_agg_lats_to_bottom (dest_plats);
2612 return true;
2615 else
2616 dest_plats->aggs_by_ref = new_aggs_by_ref;
2617 return false;
2620 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2621 already existing lattice for the given OFFSET and SIZE, marking all skipped
2622 lattices as containing variable and checking for overlaps. If there is no
2623 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2624 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2625 unless there are too many already. If there are two many, return false. If
2626 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2627 skipped lattices were newly marked as containing variable, set *CHANGE to
2628 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2630 static bool
2631 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2632 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2633 struct ipcp_agg_lattice ***aglat,
2634 bool pre_existing, bool *change, int max_agg_items)
2636 gcc_checking_assert (offset >= 0);
2638 while (**aglat && (**aglat)->offset < offset)
2640 if ((**aglat)->offset + (**aglat)->size > offset)
2642 set_agg_lats_to_bottom (dest_plats);
2643 return false;
2645 *change |= (**aglat)->set_contains_variable ();
2646 *aglat = &(**aglat)->next;
2649 if (**aglat && (**aglat)->offset == offset)
2651 if ((**aglat)->size != val_size)
2653 set_agg_lats_to_bottom (dest_plats);
2654 return false;
2656 gcc_assert (!(**aglat)->next
2657 || (**aglat)->next->offset >= offset + val_size);
2658 return true;
2660 else
2662 struct ipcp_agg_lattice *new_al;
2664 if (**aglat && (**aglat)->offset < offset + val_size)
2666 set_agg_lats_to_bottom (dest_plats);
2667 return false;
2669 if (dest_plats->aggs_count == max_agg_items)
2670 return false;
2671 dest_plats->aggs_count++;
2672 new_al = ipcp_agg_lattice_pool.allocate ();
2674 new_al->offset = offset;
2675 new_al->size = val_size;
2676 new_al->contains_variable = pre_existing;
2678 new_al->next = **aglat;
2679 **aglat = new_al;
2680 return true;
2684 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2685 containing an unknown value. */
2687 static bool
2688 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2690 bool ret = false;
2691 while (aglat)
2693 ret |= aglat->set_contains_variable ();
2694 aglat = aglat->next;
2696 return ret;
2699 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2700 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2701 parameter used for lattice value sources. Return true if DEST_PLATS changed
2702 in any way. */
2704 static bool
2705 merge_aggregate_lattices (struct cgraph_edge *cs,
2706 class ipcp_param_lattices *dest_plats,
2707 class ipcp_param_lattices *src_plats,
2708 int src_idx, HOST_WIDE_INT offset_delta)
2710 bool pre_existing = dest_plats->aggs != NULL;
2711 struct ipcp_agg_lattice **dst_aglat;
2712 bool ret = false;
2714 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2715 return true;
2716 if (src_plats->aggs_bottom)
2717 return set_agg_lats_contain_variable (dest_plats);
2718 if (src_plats->aggs_contain_variable)
2719 ret |= set_agg_lats_contain_variable (dest_plats);
2720 dst_aglat = &dest_plats->aggs;
2722 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2723 param_ipa_max_agg_items);
2724 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2725 src_aglat;
2726 src_aglat = src_aglat->next)
2728 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2730 if (new_offset < 0)
2731 continue;
2732 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2733 &dst_aglat, pre_existing, &ret, max_agg_items))
2735 struct ipcp_agg_lattice *new_al = *dst_aglat;
2737 dst_aglat = &(*dst_aglat)->next;
2738 if (src_aglat->bottom)
2740 ret |= new_al->set_contains_variable ();
2741 continue;
2743 if (src_aglat->contains_variable)
2744 ret |= new_al->set_contains_variable ();
2745 for (ipcp_value<tree> *val = src_aglat->values;
2746 val;
2747 val = val->next)
2748 ret |= new_al->add_value (val->value, cs, val, src_idx,
2749 src_aglat->offset);
2751 else if (dest_plats->aggs_bottom)
2752 return true;
2754 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2755 return ret;
2758 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2759 pass-through JFUNC and if so, whether it has conform and conforms to the
2760 rules about propagating values passed by reference. */
2762 static bool
2763 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2764 struct ipa_jump_func *jfunc)
2766 return src_plats->aggs
2767 && (!src_plats->aggs_by_ref
2768 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2771 /* Propagate values through ITEM, jump function for a part of an aggregate,
2772 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2773 associated with the jump function. Return true if AGLAT changed in any
2774 way. */
2776 static bool
2777 propagate_aggregate_lattice (struct cgraph_edge *cs,
2778 struct ipa_agg_jf_item *item,
2779 struct ipcp_agg_lattice *aglat)
2781 class ipa_node_params *caller_info;
2782 class ipcp_param_lattices *src_plats;
2783 struct ipcp_lattice<tree> *src_lat;
2784 HOST_WIDE_INT src_offset;
2785 int src_idx;
2786 tree load_type;
2787 bool ret;
2789 if (item->jftype == IPA_JF_CONST)
2791 tree value = item->value.constant;
2793 gcc_checking_assert (is_gimple_ip_invariant (value));
2794 return aglat->add_value (value, cs, NULL, 0);
2797 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2798 || item->jftype == IPA_JF_LOAD_AGG);
2800 caller_info = ipa_node_params_sum->get (cs->caller);
2801 src_idx = item->value.pass_through.formal_id;
2802 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2804 if (item->jftype == IPA_JF_PASS_THROUGH)
2806 load_type = NULL_TREE;
2807 src_lat = &src_plats->itself;
2808 src_offset = -1;
2810 else
2812 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2813 struct ipcp_agg_lattice *src_aglat;
2815 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2816 if (src_aglat->offset >= load_offset)
2817 break;
2819 load_type = item->value.load_agg.type;
2820 if (!src_aglat
2821 || src_aglat->offset > load_offset
2822 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2823 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2824 return aglat->set_contains_variable ();
2826 src_lat = src_aglat;
2827 src_offset = load_offset;
2830 if (src_lat->bottom
2831 || (!ipcp_versionable_function_p (cs->caller)
2832 && !src_lat->is_single_const ()))
2833 return aglat->set_contains_variable ();
2835 ret = propagate_vals_across_arith_jfunc (cs,
2836 item->value.pass_through.operation,
2837 load_type,
2838 item->value.pass_through.operand,
2839 src_lat, aglat,
2840 src_offset,
2841 src_idx,
2842 item->type);
2844 if (src_lat->contains_variable)
2845 ret |= aglat->set_contains_variable ();
2847 return ret;
2850 /* Propagate scalar values across jump function JFUNC that is associated with
2851 edge CS and put the values into DEST_LAT. */
2853 static bool
2854 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2855 struct ipa_jump_func *jfunc,
2856 class ipcp_param_lattices *dest_plats)
2858 bool ret = false;
2860 if (dest_plats->aggs_bottom)
2861 return false;
2863 if (jfunc->type == IPA_JF_PASS_THROUGH
2864 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2866 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2867 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2868 class ipcp_param_lattices *src_plats;
2870 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2871 if (agg_pass_through_permissible_p (src_plats, jfunc))
2873 /* Currently we do not produce clobber aggregate jump
2874 functions, replace with merging when we do. */
2875 gcc_assert (!jfunc->agg.items);
2876 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2877 src_idx, 0);
2878 return ret;
2881 else if (jfunc->type == IPA_JF_ANCESTOR
2882 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2884 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2885 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2886 class ipcp_param_lattices *src_plats;
2888 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2889 if (src_plats->aggs && src_plats->aggs_by_ref)
2891 /* Currently we do not produce clobber aggregate jump
2892 functions, replace with merging when we do. */
2893 gcc_assert (!jfunc->agg.items);
2894 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2895 ipa_get_jf_ancestor_offset (jfunc));
2897 else if (!src_plats->aggs_by_ref)
2898 ret |= set_agg_lats_to_bottom (dest_plats);
2899 else
2900 ret |= set_agg_lats_contain_variable (dest_plats);
2901 return ret;
2904 if (jfunc->agg.items)
2906 bool pre_existing = dest_plats->aggs != NULL;
2907 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2908 struct ipa_agg_jf_item *item;
2909 int i;
2911 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2912 return true;
2914 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2915 param_ipa_max_agg_items);
2916 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2918 HOST_WIDE_INT val_size;
2920 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2921 continue;
2922 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2924 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2925 &aglat, pre_existing, &ret, max_agg_items))
2927 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2928 aglat = &(*aglat)->next;
2930 else if (dest_plats->aggs_bottom)
2931 return true;
2934 ret |= set_chain_of_aglats_contains_variable (*aglat);
2936 else
2937 ret |= set_agg_lats_contain_variable (dest_plats);
2939 return ret;
2942 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2943 non-thunk) destination, the call passes through a thunk. */
2945 static bool
2946 call_passes_through_thunk (cgraph_edge *cs)
2948 cgraph_node *alias_or_thunk = cs->callee;
2949 while (alias_or_thunk->alias)
2950 alias_or_thunk = alias_or_thunk->get_alias_target ();
2951 return alias_or_thunk->thunk;
2954 /* Propagate constants from the caller to the callee of CS. INFO describes the
2955 caller. */
2957 static bool
2958 propagate_constants_across_call (struct cgraph_edge *cs)
2960 class ipa_node_params *callee_info;
2961 enum availability availability;
2962 cgraph_node *callee;
2963 class ipa_edge_args *args;
2964 bool ret = false;
2965 int i, args_count, parms_count;
2967 callee = cs->callee->function_symbol (&availability);
2968 if (!callee->definition)
2969 return false;
2970 gcc_checking_assert (callee->has_gimple_body_p ());
2971 callee_info = ipa_node_params_sum->get (callee);
2972 if (!callee_info)
2973 return false;
2975 args = ipa_edge_args_sum->get (cs);
2976 parms_count = ipa_get_param_count (callee_info);
2977 if (parms_count == 0)
2978 return false;
2979 if (!args
2980 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2981 || !opt_for_fn (cs->caller->decl, optimize))
2983 for (i = 0; i < parms_count; i++)
2984 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2985 i));
2986 return ret;
2988 args_count = ipa_get_cs_argument_count (args);
2990 /* If this call goes through a thunk we must not propagate to the first (0th)
2991 parameter. However, we might need to uncover a thunk from below a series
2992 of aliases first. */
2993 if (call_passes_through_thunk (cs))
2995 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2996 0));
2997 i = 1;
2999 else
3000 i = 0;
3002 for (; (i < args_count) && (i < parms_count); i++)
3004 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3005 class ipcp_param_lattices *dest_plats;
3006 tree param_type = ipa_get_type (callee_info, i);
3008 dest_plats = ipa_get_parm_lattices (callee_info, i);
3009 if (availability == AVAIL_INTERPOSABLE)
3010 ret |= set_all_contains_variable (dest_plats);
3011 else
3013 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3014 &dest_plats->itself,
3015 param_type);
3016 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3017 &dest_plats->ctxlat);
3019 |= propagate_bits_across_jump_function (cs, i, jump_func,
3020 &dest_plats->bits_lattice);
3021 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3022 dest_plats);
3023 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3024 ret |= propagate_vr_across_jump_function (cs, jump_func,
3025 dest_plats, param_type);
3026 else
3027 ret |= dest_plats->m_value_range.set_to_bottom ();
3030 for (; i < parms_count; i++)
3031 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3033 return ret;
3036 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3037 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3038 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3039 KNOWN_AGGS is ignored. */
3041 static tree
3042 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3043 const vec<tree> &known_csts,
3044 const vec<ipa_polymorphic_call_context> &known_contexts,
3045 const ipa_argagg_value_list &avs,
3046 bool *speculative)
3048 int param_index = ie->indirect_info->param_index;
3049 HOST_WIDE_INT anc_offset;
3050 tree t = NULL;
3051 tree target = NULL;
3053 *speculative = false;
3055 if (param_index == -1)
3056 return NULL_TREE;
3058 if (!ie->indirect_info->polymorphic)
3060 tree t = NULL;
3062 if (ie->indirect_info->agg_contents)
3064 t = NULL;
3065 if ((unsigned) param_index < known_csts.length ()
3066 && known_csts[param_index])
3067 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3068 ie->indirect_info->offset,
3069 ie->indirect_info->by_ref);
3071 if (!t && ie->indirect_info->guaranteed_unmodified)
3072 t = avs.get_value (param_index,
3073 ie->indirect_info->offset / BITS_PER_UNIT,
3074 ie->indirect_info->by_ref);
3076 else if ((unsigned) param_index < known_csts.length ())
3077 t = known_csts[param_index];
3079 if (t
3080 && TREE_CODE (t) == ADDR_EXPR
3081 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3082 return TREE_OPERAND (t, 0);
3083 else
3084 return NULL_TREE;
3087 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3088 return NULL_TREE;
3090 gcc_assert (!ie->indirect_info->agg_contents);
3091 gcc_assert (!ie->indirect_info->by_ref);
3092 anc_offset = ie->indirect_info->offset;
3094 t = NULL;
3096 if ((unsigned) param_index < known_csts.length ()
3097 && known_csts[param_index])
3098 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3099 ie->indirect_info->offset, true);
3101 /* Try to work out value of virtual table pointer value in replacements. */
3102 /* or known aggregate values. */
3103 if (!t)
3104 t = avs.get_value (param_index,
3105 ie->indirect_info->offset / BITS_PER_UNIT,
3106 true);
3108 /* If we found the virtual table pointer, lookup the target. */
3109 if (t)
3111 tree vtable;
3112 unsigned HOST_WIDE_INT offset;
3113 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3115 bool can_refer;
3116 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3117 vtable, offset, &can_refer);
3118 if (can_refer)
3120 if (!target
3121 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3122 || !possible_polymorphic_call_target_p
3123 (ie, cgraph_node::get (target)))
3125 /* Do not speculate builtin_unreachable, it is stupid! */
3126 if (ie->indirect_info->vptr_changed)
3127 return NULL;
3128 target = ipa_impossible_devirt_target (ie, target);
3130 *speculative = ie->indirect_info->vptr_changed;
3131 if (!*speculative)
3132 return target;
3137 /* Do we know the constant value of pointer? */
3138 if (!t && (unsigned) param_index < known_csts.length ())
3139 t = known_csts[param_index];
3141 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3143 ipa_polymorphic_call_context context;
3144 if (known_contexts.length () > (unsigned int) param_index)
3146 context = known_contexts[param_index];
3147 context.offset_by (anc_offset);
3148 if (ie->indirect_info->vptr_changed)
3149 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3150 ie->indirect_info->otr_type);
3151 if (t)
3153 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3154 (t, ie->indirect_info->otr_type, anc_offset);
3155 if (!ctx2.useless_p ())
3156 context.combine_with (ctx2, ie->indirect_info->otr_type);
3159 else if (t)
3161 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3162 anc_offset);
3163 if (ie->indirect_info->vptr_changed)
3164 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3165 ie->indirect_info->otr_type);
3167 else
3168 return NULL_TREE;
3170 vec <cgraph_node *>targets;
3171 bool final;
3173 targets = possible_polymorphic_call_targets
3174 (ie->indirect_info->otr_type,
3175 ie->indirect_info->otr_token,
3176 context, &final);
3177 if (!final || targets.length () > 1)
3179 struct cgraph_node *node;
3180 if (*speculative)
3181 return target;
3182 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3183 || ie->speculative || !ie->maybe_hot_p ())
3184 return NULL;
3185 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3186 ie->indirect_info->otr_token,
3187 context);
3188 if (node)
3190 *speculative = true;
3191 target = node->decl;
3193 else
3194 return NULL;
3196 else
3198 *speculative = false;
3199 if (targets.length () == 1)
3200 target = targets[0]->decl;
3201 else
3202 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3205 if (target && !possible_polymorphic_call_target_p (ie,
3206 cgraph_node::get (target)))
3208 if (*speculative)
3209 return NULL;
3210 target = ipa_impossible_devirt_target (ie, target);
3213 return target;
3216 /* If an indirect edge IE can be turned into a direct one based on data in
3217 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3218 whether the discovered target is only speculative guess. */
3220 tree
3221 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3222 ipa_call_arg_values *avals,
3223 bool *speculative)
3225 ipa_argagg_value_list avl (avals);
3226 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3227 avals->m_known_contexts,
3228 avl, speculative);
3231 /* Calculate devirtualization time bonus for NODE, assuming we know information
3232 about arguments stored in AVALS. */
3234 static int
3235 devirtualization_time_bonus (struct cgraph_node *node,
3236 ipa_auto_call_arg_values *avals)
3238 struct cgraph_edge *ie;
3239 int res = 0;
3241 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3243 struct cgraph_node *callee;
3244 class ipa_fn_summary *isummary;
3245 enum availability avail;
3246 tree target;
3247 bool speculative;
3249 ipa_argagg_value_list avl (avals);
3250 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3251 avals->m_known_contexts,
3252 avl, &speculative);
3253 if (!target)
3254 continue;
3256 /* Only bare minimum benefit for clearly un-inlineable targets. */
3257 res += 1;
3258 callee = cgraph_node::get (target);
3259 if (!callee || !callee->definition)
3260 continue;
3261 callee = callee->function_symbol (&avail);
3262 if (avail < AVAIL_AVAILABLE)
3263 continue;
3264 isummary = ipa_fn_summaries->get (callee);
3265 if (!isummary || !isummary->inlinable)
3266 continue;
3268 int size = ipa_size_summaries->get (callee)->size;
3269 /* FIXME: The values below need re-considering and perhaps also
3270 integrating into the cost metrics, at lest in some very basic way. */
3271 int max_inline_insns_auto
3272 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3273 if (size <= max_inline_insns_auto / 4)
3274 res += 31 / ((int)speculative + 1);
3275 else if (size <= max_inline_insns_auto / 2)
3276 res += 15 / ((int)speculative + 1);
3277 else if (size <= max_inline_insns_auto
3278 || DECL_DECLARED_INLINE_P (callee->decl))
3279 res += 7 / ((int)speculative + 1);
3282 return res;
3285 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3287 static int
3288 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3290 int result = 0;
3291 ipa_hints hints = estimates.hints;
3292 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3293 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3295 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3297 if (hints & INLINE_HINT_loop_iterations)
3298 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3300 if (hints & INLINE_HINT_loop_stride)
3301 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3303 return result;
3306 /* If there is a reason to penalize the function described by INFO in the
3307 cloning goodness evaluation, do so. */
3309 static inline sreal
3310 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3311 sreal evaluation)
3313 if (info->node_within_scc && !info->node_is_self_scc)
3314 evaluation = (evaluation
3315 * (100 - opt_for_fn (node->decl,
3316 param_ipa_cp_recursion_penalty))) / 100;
3318 if (info->node_calling_single_call)
3319 evaluation = (evaluation
3320 * (100 - opt_for_fn (node->decl,
3321 param_ipa_cp_single_call_penalty)))
3322 / 100;
3324 return evaluation;
3327 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3328 and SIZE_COST and with the sum of frequencies of incoming edges to the
3329 potential new clone in FREQUENCIES. */
3331 static bool
3332 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3333 sreal freq_sum, profile_count count_sum,
3334 int size_cost)
3336 if (time_benefit == 0
3337 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3338 || node->optimize_for_size_p ())
3339 return false;
3341 gcc_assert (size_cost > 0);
3343 ipa_node_params *info = ipa_node_params_sum->get (node);
3344 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3345 if (count_sum.nonzero_p ())
3347 gcc_assert (base_count.nonzero_p ());
3348 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3349 sreal evaluation = (time_benefit * factor) / size_cost;
3350 evaluation = incorporate_penalties (node, info, evaluation);
3351 evaluation *= 1000;
3353 if (dump_file && (dump_flags & TDF_DETAILS))
3355 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3356 "size: %i, count_sum: ", time_benefit.to_double (),
3357 size_cost);
3358 count_sum.dump (dump_file);
3359 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3360 info->node_within_scc
3361 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3362 info->node_calling_single_call ? ", single_call" : "",
3363 evaluation.to_double (), eval_threshold);
3366 return evaluation.to_int () >= eval_threshold;
3368 else
3370 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3371 evaluation = incorporate_penalties (node, info, evaluation);
3372 evaluation *= 1000;
3374 if (dump_file && (dump_flags & TDF_DETAILS))
3375 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3376 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3377 "threshold: %i\n",
3378 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3379 info->node_within_scc
3380 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3381 info->node_calling_single_call ? ", single_call" : "",
3382 evaluation.to_double (), eval_threshold);
3384 return evaluation.to_int () >= eval_threshold;
3388 /* Grow vectors in AVALS and fill them with information about values of
3389 parameters that are known to be independent of the context. Only calculate
3390 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3391 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3392 parameters will be stored in it.
3394 TODO: Also grow context independent value range vectors. */
3396 static bool
3397 gather_context_independent_values (class ipa_node_params *info,
3398 ipa_auto_call_arg_values *avals,
3399 bool calculate_aggs,
3400 int *removable_params_cost)
3402 int i, count = ipa_get_param_count (info);
3403 bool ret = false;
3405 avals->m_known_vals.safe_grow_cleared (count, true);
3406 avals->m_known_contexts.safe_grow_cleared (count, true);
3408 if (removable_params_cost)
3409 *removable_params_cost = 0;
3411 for (i = 0; i < count; i++)
3413 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3414 ipcp_lattice<tree> *lat = &plats->itself;
3416 if (lat->is_single_const ())
3418 ipcp_value<tree> *val = lat->values;
3419 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3420 avals->m_known_vals[i] = val->value;
3421 if (removable_params_cost)
3422 *removable_params_cost
3423 += estimate_move_cost (TREE_TYPE (val->value), false);
3424 ret = true;
3426 else if (removable_params_cost
3427 && !ipa_is_param_used (info, i))
3428 *removable_params_cost
3429 += ipa_get_param_move_cost (info, i);
3431 if (!ipa_is_param_used (info, i))
3432 continue;
3434 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3435 /* Do not account known context as reason for cloning. We can see
3436 if it permits devirtualization. */
3437 if (ctxlat->is_single_const ())
3438 avals->m_known_contexts[i] = ctxlat->values->value;
3440 if (calculate_aggs)
3441 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3444 return ret;
3447 /* Perform time and size measurement of NODE with the context given in AVALS,
3448 calculate the benefit compared to the node without specialization and store
3449 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3450 context-independent or unused removable parameters and EST_MOVE_COST, the
3451 estimated movement of the considered parameter. */
3453 static void
3454 perform_estimation_of_a_value (cgraph_node *node,
3455 ipa_auto_call_arg_values *avals,
3456 int removable_params_cost, int est_move_cost,
3457 ipcp_value_base *val)
3459 sreal time_benefit;
3460 ipa_call_estimates estimates;
3462 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3464 /* Extern inline functions have no cloning local time benefits because they
3465 will be inlined anyway. The only reason to clone them is if it enables
3466 optimization in any of the functions they call. */
3467 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3468 time_benefit = 0;
3469 else
3470 time_benefit = (estimates.nonspecialized_time - estimates.time)
3471 + (devirtualization_time_bonus (node, avals)
3472 + hint_time_bonus (node, estimates)
3473 + removable_params_cost + est_move_cost);
3475 int size = estimates.size;
3476 gcc_checking_assert (size >=0);
3477 /* The inliner-heuristics based estimates may think that in certain
3478 contexts some functions do not have any size at all but we want
3479 all specializations to have at least a tiny cost, not least not to
3480 divide by zero. */
3481 if (size == 0)
3482 size = 1;
3484 val->local_time_benefit = time_benefit;
3485 val->local_size_cost = size;
3488 /* Get the overall limit oof growth based on parameters extracted from growth.
3489 it does not really make sense to mix functions with different overall growth
3490 limits but it is possible and if it happens, we do not want to select one
3491 limit at random. */
3493 static long
3494 get_max_overall_size (cgraph_node *node)
3496 long max_new_size = orig_overall_size;
3497 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3498 if (max_new_size < large_unit)
3499 max_new_size = large_unit;
3500 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3501 max_new_size += max_new_size * unit_growth / 100 + 1;
3502 return max_new_size;
3505 /* Return true if NODE should be cloned just for a parameter removal, possibly
3506 dumping a reason if not. */
3508 static bool
3509 clone_for_param_removal_p (cgraph_node *node)
3511 if (!node->can_change_signature)
3513 if (dump_file && (dump_flags & TDF_DETAILS))
3514 fprintf (dump_file, " Not considering cloning to remove parameters, "
3515 "function cannot change signature.\n");
3516 return false;
3518 if (node->can_be_local_p ())
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (dump_file, " Not considering cloning to remove parameters, "
3522 "IPA-SRA can do it potentially better.\n");
3523 return false;
3525 return true;
3528 /* Iterate over known values of parameters of NODE and estimate the local
3529 effects in terms of time and size they have. */
3531 static void
3532 estimate_local_effects (struct cgraph_node *node)
3534 ipa_node_params *info = ipa_node_params_sum->get (node);
3535 int count = ipa_get_param_count (info);
3536 bool always_const;
3537 int removable_params_cost;
3539 if (!count || !ipcp_versionable_function_p (node))
3540 return;
3542 if (dump_file && (dump_flags & TDF_DETAILS))
3543 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3545 ipa_auto_call_arg_values avals;
3546 always_const = gather_context_independent_values (info, &avals, true,
3547 &removable_params_cost);
3548 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3549 if (always_const || devirt_bonus
3550 || (removable_params_cost && clone_for_param_removal_p (node)))
3552 struct caller_statistics stats;
3553 ipa_call_estimates estimates;
3555 init_caller_stats (&stats);
3556 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3557 false);
3558 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3559 sreal time = estimates.nonspecialized_time - estimates.time;
3560 time += devirt_bonus;
3561 time += hint_time_bonus (node, estimates);
3562 time += removable_params_cost;
3563 int size = estimates.size - stats.n_calls * removable_params_cost;
3565 if (dump_file)
3566 fprintf (dump_file, " - context independent values, size: %i, "
3567 "time_benefit: %f\n", size, (time).to_double ());
3569 if (size <= 0 || node->local)
3571 info->do_clone_for_all_contexts = true;
3573 if (dump_file)
3574 fprintf (dump_file, " Decided to specialize for all "
3575 "known contexts, code not going to grow.\n");
3577 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3578 stats.count_sum, size))
3580 if (size + overall_size <= get_max_overall_size (node))
3582 info->do_clone_for_all_contexts = true;
3583 overall_size += size;
3585 if (dump_file)
3586 fprintf (dump_file, " Decided to specialize for all "
3587 "known contexts, growth (to %li) deemed "
3588 "beneficial.\n", overall_size);
3590 else if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (dump_file, " Not cloning for all contexts because "
3592 "maximum unit size would be reached with %li.\n",
3593 size + overall_size);
3595 else if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, " Not cloning for all contexts because "
3597 "!good_cloning_opportunity_p.\n");
3601 for (int i = 0; i < count; i++)
3603 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3604 ipcp_lattice<tree> *lat = &plats->itself;
3605 ipcp_value<tree> *val;
3607 if (lat->bottom
3608 || !lat->values
3609 || avals.m_known_vals[i])
3610 continue;
3612 for (val = lat->values; val; val = val->next)
3614 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3615 avals.m_known_vals[i] = val->value;
3617 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3618 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3619 emc, val);
3621 if (dump_file && (dump_flags & TDF_DETAILS))
3623 fprintf (dump_file, " - estimates for value ");
3624 print_ipcp_constant_value (dump_file, val->value);
3625 fprintf (dump_file, " for ");
3626 ipa_dump_param (dump_file, info, i);
3627 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3628 val->local_time_benefit.to_double (),
3629 val->local_size_cost);
3632 avals.m_known_vals[i] = NULL_TREE;
3635 for (int i = 0; i < count; i++)
3637 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3639 if (!plats->virt_call)
3640 continue;
3642 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3643 ipcp_value<ipa_polymorphic_call_context> *val;
3645 if (ctxlat->bottom
3646 || !ctxlat->values
3647 || !avals.m_known_contexts[i].useless_p ())
3648 continue;
3650 for (val = ctxlat->values; val; val = val->next)
3652 avals.m_known_contexts[i] = val->value;
3653 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3654 0, val);
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (dump_file, " - estimates for polymorphic context ");
3659 print_ipcp_constant_value (dump_file, val->value);
3660 fprintf (dump_file, " for ");
3661 ipa_dump_param (dump_file, info, i);
3662 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3663 val->local_time_benefit.to_double (),
3664 val->local_size_cost);
3667 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3670 unsigned all_ctx_len = avals.m_known_aggs.length ();
3671 auto_vec<ipa_argagg_value, 32> all_ctx;
3672 all_ctx.reserve_exact (all_ctx_len);
3673 all_ctx.splice (avals.m_known_aggs);
3674 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3676 unsigned j = 0;
3677 for (int index = 0; index < count; index++)
3679 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3681 if (plats->aggs_bottom || !plats->aggs)
3682 continue;
3684 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3686 ipcp_value<tree> *val;
3687 if (aglat->bottom || !aglat->values
3688 /* If the following is true, the one value is already part of all
3689 context estimations. */
3690 || (!plats->aggs_contain_variable
3691 && aglat->is_single_const ()))
3692 continue;
3694 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3695 while (j < all_ctx_len
3696 && (all_ctx[j].index < index
3697 || (all_ctx[j].index == index
3698 && all_ctx[j].unit_offset < unit_offset)))
3700 avals.m_known_aggs[j] = all_ctx[j];
3701 j++;
3704 for (unsigned k = j; k < all_ctx_len; k++)
3705 avals.m_known_aggs[k+1] = all_ctx[k];
3707 for (val = aglat->values; val; val = val->next)
3709 avals.m_known_aggs[j].value = val->value;
3710 avals.m_known_aggs[j].unit_offset = unit_offset;
3711 avals.m_known_aggs[j].index = index;
3712 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3713 avals.m_known_aggs[j].killed = false;
3715 perform_estimation_of_a_value (node, &avals,
3716 removable_params_cost, 0, val);
3718 if (dump_file && (dump_flags & TDF_DETAILS))
3720 fprintf (dump_file, " - estimates for value ");
3721 print_ipcp_constant_value (dump_file, val->value);
3722 fprintf (dump_file, " for ");
3723 ipa_dump_param (dump_file, info, index);
3724 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3725 "]: time_benefit: %g, size: %i\n",
3726 plats->aggs_by_ref ? "ref " : "",
3727 aglat->offset,
3728 val->local_time_benefit.to_double (),
3729 val->local_size_cost);
3737 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3738 topological sort of values. */
3740 template <typename valtype>
3741 void
3742 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3744 ipcp_value_source<valtype> *src;
3746 if (cur_val->dfs)
3747 return;
3749 dfs_counter++;
3750 cur_val->dfs = dfs_counter;
3751 cur_val->low_link = dfs_counter;
3753 cur_val->topo_next = stack;
3754 stack = cur_val;
3755 cur_val->on_stack = true;
3757 for (src = cur_val->sources; src; src = src->next)
3758 if (src->val)
3760 if (src->val->dfs == 0)
3762 add_val (src->val);
3763 if (src->val->low_link < cur_val->low_link)
3764 cur_val->low_link = src->val->low_link;
3766 else if (src->val->on_stack
3767 && src->val->dfs < cur_val->low_link)
3768 cur_val->low_link = src->val->dfs;
3771 if (cur_val->dfs == cur_val->low_link)
3773 ipcp_value<valtype> *v, *scc_list = NULL;
3777 v = stack;
3778 stack = v->topo_next;
3779 v->on_stack = false;
3780 v->scc_no = cur_val->dfs;
3782 v->scc_next = scc_list;
3783 scc_list = v;
3785 while (v != cur_val);
3787 cur_val->topo_next = values_topo;
3788 values_topo = cur_val;
3792 /* Add all values in lattices associated with NODE to the topological sort if
3793 they are not there yet. */
3795 static void
3796 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3798 ipa_node_params *info = ipa_node_params_sum->get (node);
3799 int i, count = ipa_get_param_count (info);
3801 for (i = 0; i < count; i++)
3803 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3804 ipcp_lattice<tree> *lat = &plats->itself;
3805 struct ipcp_agg_lattice *aglat;
3807 if (!lat->bottom)
3809 ipcp_value<tree> *val;
3810 for (val = lat->values; val; val = val->next)
3811 topo->constants.add_val (val);
3814 if (!plats->aggs_bottom)
3815 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3816 if (!aglat->bottom)
3818 ipcp_value<tree> *val;
3819 for (val = aglat->values; val; val = val->next)
3820 topo->constants.add_val (val);
3823 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3824 if (!ctxlat->bottom)
3826 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3827 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3828 topo->contexts.add_val (ctxval);
3833 /* One pass of constants propagation along the call graph edges, from callers
3834 to callees (requires topological ordering in TOPO), iterate over strongly
3835 connected components. */
3837 static void
3838 propagate_constants_topo (class ipa_topo_info *topo)
3840 int i;
3842 for (i = topo->nnodes - 1; i >= 0; i--)
3844 unsigned j;
3845 struct cgraph_node *v, *node = topo->order[i];
3846 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3848 /* First, iteratively propagate within the strongly connected component
3849 until all lattices stabilize. */
3850 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3851 if (v->has_gimple_body_p ())
3853 if (opt_for_fn (v->decl, flag_ipa_cp)
3854 && opt_for_fn (v->decl, optimize))
3855 push_node_to_stack (topo, v);
3856 /* When V is not optimized, we can not push it to stack, but
3857 still we need to set all its callees lattices to bottom. */
3858 else
3860 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3861 propagate_constants_across_call (cs);
3865 v = pop_node_from_stack (topo);
3866 while (v)
3868 struct cgraph_edge *cs;
3869 class ipa_node_params *info = NULL;
3870 bool self_scc = true;
3872 for (cs = v->callees; cs; cs = cs->next_callee)
3873 if (ipa_edge_within_scc (cs))
3875 cgraph_node *callee = cs->callee->function_symbol ();
3877 if (v != callee)
3878 self_scc = false;
3880 if (!info)
3882 info = ipa_node_params_sum->get (v);
3883 info->node_within_scc = true;
3886 if (propagate_constants_across_call (cs))
3887 push_node_to_stack (topo, callee);
3890 if (info)
3891 info->node_is_self_scc = self_scc;
3893 v = pop_node_from_stack (topo);
3896 /* Afterwards, propagate along edges leading out of the SCC, calculates
3897 the local effects of the discovered constants and all valid values to
3898 their topological sort. */
3899 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3900 if (v->has_gimple_body_p ()
3901 && opt_for_fn (v->decl, flag_ipa_cp)
3902 && opt_for_fn (v->decl, optimize))
3904 struct cgraph_edge *cs;
3906 estimate_local_effects (v);
3907 add_all_node_vals_to_toposort (v, topo);
3908 for (cs = v->callees; cs; cs = cs->next_callee)
3909 if (!ipa_edge_within_scc (cs))
3910 propagate_constants_across_call (cs);
3912 cycle_nodes.release ();
3916 /* Propagate the estimated effects of individual values along the topological
3917 from the dependent values to those they depend on. */
3919 template <typename valtype>
3920 void
3921 value_topo_info<valtype>::propagate_effects ()
3923 ipcp_value<valtype> *base;
3924 hash_set<ipcp_value<valtype> *> processed_srcvals;
3926 for (base = values_topo; base; base = base->topo_next)
3928 ipcp_value_source<valtype> *src;
3929 ipcp_value<valtype> *val;
3930 sreal time = 0;
3931 HOST_WIDE_INT size = 0;
3933 for (val = base; val; val = val->scc_next)
3935 time = time + val->local_time_benefit + val->prop_time_benefit;
3936 size = size + val->local_size_cost + val->prop_size_cost;
3939 for (val = base; val; val = val->scc_next)
3941 processed_srcvals.empty ();
3942 for (src = val->sources; src; src = src->next)
3943 if (src->val
3944 && src->cs->maybe_hot_p ())
3946 if (!processed_srcvals.add (src->val))
3948 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3949 if (prop_size < INT_MAX)
3950 src->val->prop_size_cost = prop_size;
3951 else
3952 continue;
3955 int special_factor = 1;
3956 if (val->same_scc (src->val))
3957 special_factor
3958 = opt_for_fn(src->cs->caller->decl,
3959 param_ipa_cp_recursive_freq_factor);
3960 else if (val->self_recursion_generated_p ()
3961 && (src->cs->callee->function_symbol ()
3962 == src->cs->caller))
3964 int max_recur_gen_depth
3965 = opt_for_fn(src->cs->caller->decl,
3966 param_ipa_cp_max_recursive_depth);
3967 special_factor = max_recur_gen_depth
3968 - val->self_recursion_generated_level + 1;
3971 src->val->prop_time_benefit
3972 += time * special_factor * src->cs->sreal_frequency ();
3975 if (size < INT_MAX)
3977 val->prop_time_benefit = time;
3978 val->prop_size_cost = size;
3980 else
3982 val->prop_time_benefit = 0;
3983 val->prop_size_cost = 0;
3989 /* Callback for qsort to sort counts of all edges. */
3991 static int
3992 compare_edge_profile_counts (const void *a, const void *b)
3994 const profile_count *cnt1 = (const profile_count *) a;
3995 const profile_count *cnt2 = (const profile_count *) b;
3997 if (*cnt1 < *cnt2)
3998 return 1;
3999 if (*cnt1 > *cnt2)
4000 return -1;
4001 return 0;
4005 /* Propagate constants, polymorphic contexts and their effects from the
4006 summaries interprocedurally. */
4008 static void
4009 ipcp_propagate_stage (class ipa_topo_info *topo)
4011 struct cgraph_node *node;
4013 if (dump_file)
4014 fprintf (dump_file, "\n Propagating constants:\n\n");
4016 base_count = profile_count::uninitialized ();
4018 bool compute_count_base = false;
4019 unsigned base_count_pos_percent = 0;
4020 FOR_EACH_DEFINED_FUNCTION (node)
4022 if (node->has_gimple_body_p ()
4023 && opt_for_fn (node->decl, flag_ipa_cp)
4024 && opt_for_fn (node->decl, optimize))
4026 ipa_node_params *info = ipa_node_params_sum->get (node);
4027 determine_versionability (node, info);
4029 unsigned nlattices = ipa_get_param_count (info);
4030 info->lattices.safe_grow_cleared (nlattices, true);
4031 initialize_node_lattices (node);
4033 ipa_size_summary *s = ipa_size_summaries->get (node);
4034 if (node->definition && !node->alias && s != NULL)
4035 overall_size += s->self_size;
4036 if (node->count.ipa ().initialized_p ())
4038 compute_count_base = true;
4039 unsigned pos_percent = opt_for_fn (node->decl,
4040 param_ipa_cp_profile_count_base);
4041 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4045 if (compute_count_base)
4047 auto_vec<profile_count> all_edge_counts;
4048 all_edge_counts.reserve_exact (symtab->edges_count);
4049 FOR_EACH_DEFINED_FUNCTION (node)
4050 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4052 profile_count count = cs->count.ipa ();
4053 if (!count.nonzero_p ())
4054 continue;
4056 enum availability avail;
4057 cgraph_node *tgt
4058 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4059 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4060 if (info && info->versionable)
4061 all_edge_counts.quick_push (count);
4064 if (!all_edge_counts.is_empty ())
4066 gcc_assert (base_count_pos_percent <= 100);
4067 all_edge_counts.qsort (compare_edge_profile_counts);
4069 unsigned base_count_pos
4070 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4071 base_count = all_edge_counts[base_count_pos];
4073 if (dump_file)
4075 fprintf (dump_file, "\nSelected base_count from %u edges at "
4076 "position %u, arriving at: ", all_edge_counts.length (),
4077 base_count_pos);
4078 base_count.dump (dump_file);
4079 fprintf (dump_file, "\n");
4082 else if (dump_file)
4083 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4084 "continuing as if without profile feedback.\n");
4087 orig_overall_size = overall_size;
4089 if (dump_file)
4090 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4092 propagate_constants_topo (topo);
4093 if (flag_checking)
4094 ipcp_verify_propagated_values ();
4095 topo->constants.propagate_effects ();
4096 topo->contexts.propagate_effects ();
4098 if (dump_file)
4100 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4101 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4105 /* Discover newly direct outgoing edges from NODE which is a new clone with
4106 known KNOWN_CSTS and make them direct. */
4108 static void
4109 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4110 vec<tree> known_csts,
4111 vec<ipa_polymorphic_call_context>
4112 known_contexts,
4113 vec<ipa_argagg_value, va_gc> *aggvals)
4115 struct cgraph_edge *ie, *next_ie;
4116 bool found = false;
4118 for (ie = node->indirect_calls; ie; ie = next_ie)
4120 tree target;
4121 bool speculative;
4123 next_ie = ie->next_callee;
4124 ipa_argagg_value_list avs (aggvals);
4125 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4126 avs, &speculative);
4127 if (target)
4129 bool agg_contents = ie->indirect_info->agg_contents;
4130 bool polymorphic = ie->indirect_info->polymorphic;
4131 int param_index = ie->indirect_info->param_index;
4132 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4133 speculative);
4134 found = true;
4136 if (cs && !agg_contents && !polymorphic)
4138 ipa_node_params *info = ipa_node_params_sum->get (node);
4139 int c = ipa_get_controlled_uses (info, param_index);
4140 if (c != IPA_UNDESCRIBED_USE
4141 && !ipa_get_param_load_dereferenced (info, param_index))
4143 struct ipa_ref *to_del;
4145 c--;
4146 ipa_set_controlled_uses (info, param_index, c);
4147 if (dump_file && (dump_flags & TDF_DETAILS))
4148 fprintf (dump_file, " controlled uses count of param "
4149 "%i bumped down to %i\n", param_index, c);
4150 if (c == 0
4151 && (to_del = node->find_reference (cs->callee, NULL, 0,
4152 IPA_REF_ADDR)))
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4155 fprintf (dump_file, " and even removing its "
4156 "cloning-created reference\n");
4157 to_del->remove_reference ();
4163 /* Turning calls to direct calls will improve overall summary. */
4164 if (found)
4165 ipa_update_overall_fn_summary (node);
4168 class edge_clone_summary;
4169 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4171 /* Edge clone summary. */
4173 class edge_clone_summary
4175 public:
4176 /* Default constructor. */
4177 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4179 /* Default destructor. */
4180 ~edge_clone_summary ()
4182 if (prev_clone)
4183 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4184 if (next_clone)
4185 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4188 cgraph_edge *prev_clone;
4189 cgraph_edge *next_clone;
4192 class edge_clone_summary_t:
4193 public call_summary <edge_clone_summary *>
4195 public:
4196 edge_clone_summary_t (symbol_table *symtab):
4197 call_summary <edge_clone_summary *> (symtab)
4199 m_initialize_when_cloning = true;
4202 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4203 edge_clone_summary *src_data,
4204 edge_clone_summary *dst_data) final override;
4207 /* Edge duplication hook. */
4209 void
4210 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4211 edge_clone_summary *src_data,
4212 edge_clone_summary *dst_data)
4214 if (src_data->next_clone)
4215 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4216 dst_data->prev_clone = src_edge;
4217 dst_data->next_clone = src_data->next_clone;
4218 src_data->next_clone = dst_edge;
4221 /* Return true is CS calls DEST or its clone for all contexts. When
4222 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4223 edges from/to an all-context clone. */
4225 static bool
4226 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4227 bool allow_recursion_to_clone)
4229 enum availability availability;
4230 cgraph_node *callee = cs->callee->function_symbol (&availability);
4232 if (availability <= AVAIL_INTERPOSABLE)
4233 return false;
4234 if (callee == dest)
4235 return true;
4236 if (!allow_recursion_to_clone && cs->caller == callee)
4237 return false;
4239 ipa_node_params *info = ipa_node_params_sum->get (callee);
4240 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4243 /* Return true if edge CS does bring about the value described by SRC to
4244 DEST_VAL of node DEST or its clone for all contexts. */
4246 static bool
4247 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4248 cgraph_node *dest, ipcp_value<tree> *dest_val)
4250 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4252 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4253 || caller_info->node_dead)
4254 return false;
4256 if (!src->val)
4257 return true;
4259 if (caller_info->ipcp_orig_node)
4261 tree t = NULL_TREE;
4262 if (src->offset == -1)
4263 t = caller_info->known_csts[src->index];
4264 else if (ipcp_transformation *ts
4265 = ipcp_get_transformation_summary (cs->caller))
4267 ipa_argagg_value_list avl (ts);
4268 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4270 return (t != NULL_TREE
4271 && values_equal_for_ipcp_p (src->val->value, t));
4273 else
4275 if (src->val == dest_val)
4276 return true;
4278 struct ipcp_agg_lattice *aglat;
4279 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4280 src->index);
4281 if (src->offset == -1)
4282 return (plats->itself.is_single_const ()
4283 && values_equal_for_ipcp_p (src->val->value,
4284 plats->itself.values->value));
4285 else
4287 if (plats->aggs_bottom || plats->aggs_contain_variable)
4288 return false;
4289 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4290 if (aglat->offset == src->offset)
4291 return (aglat->is_single_const ()
4292 && values_equal_for_ipcp_p (src->val->value,
4293 aglat->values->value));
4295 return false;
4299 /* Return true if edge CS does bring about the value described by SRC to
4300 DST_VAL of node DEST or its clone for all contexts. */
4302 static bool
4303 cgraph_edge_brings_value_p (cgraph_edge *cs,
4304 ipcp_value_source<ipa_polymorphic_call_context> *src,
4305 cgraph_node *dest,
4306 ipcp_value<ipa_polymorphic_call_context> *)
4308 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4310 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4311 || caller_info->node_dead)
4312 return false;
4313 if (!src->val)
4314 return true;
4316 if (caller_info->ipcp_orig_node)
4317 return (caller_info->known_contexts.length () > (unsigned) src->index)
4318 && values_equal_for_ipcp_p (src->val->value,
4319 caller_info->known_contexts[src->index]);
4321 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4322 src->index);
4323 return plats->ctxlat.is_single_const ()
4324 && values_equal_for_ipcp_p (src->val->value,
4325 plats->ctxlat.values->value);
4328 /* Get the next clone in the linked list of clones of an edge. */
4330 static inline struct cgraph_edge *
4331 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4333 edge_clone_summary *s = edge_clone_summaries->get (cs);
4334 return s != NULL ? s->next_clone : NULL;
4337 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4338 of them is viable and hot, return true. In that case, for those that still
4339 hold, add their edge frequency and their number and cumulative profile
4340 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4341 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4343 template <typename valtype>
4344 static bool
4345 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4346 sreal *freq_sum, int *caller_count,
4347 profile_count *rec_count_sum,
4348 profile_count *nonrec_count_sum)
4350 ipcp_value_source<valtype> *src;
4351 sreal freq = 0;
4352 int count = 0;
4353 profile_count rec_cnt = profile_count::zero ();
4354 profile_count nonrec_cnt = profile_count::zero ();
4355 bool hot = false;
4356 bool non_self_recursive = false;
4358 for (src = val->sources; src; src = src->next)
4360 struct cgraph_edge *cs = src->cs;
4361 while (cs)
4363 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4365 count++;
4366 freq += cs->sreal_frequency ();
4367 hot |= cs->maybe_hot_p ();
4368 if (cs->caller != dest)
4370 non_self_recursive = true;
4371 if (cs->count.ipa ().initialized_p ())
4372 rec_cnt += cs->count.ipa ();
4374 else if (cs->count.ipa ().initialized_p ())
4375 nonrec_cnt += cs->count.ipa ();
4377 cs = get_next_cgraph_edge_clone (cs);
4381 /* If the only edges bringing a value are self-recursive ones, do not bother
4382 evaluating it. */
4383 if (!non_self_recursive)
4384 return false;
4386 *freq_sum = freq;
4387 *caller_count = count;
4388 *rec_count_sum = rec_cnt;
4389 *nonrec_count_sum = nonrec_cnt;
4391 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4393 struct cgraph_edge *cs;
4395 /* Cold non-SCC source edge could trigger hot recursive execution of
4396 function. Consider the case as hot and rely on following cost model
4397 computation to further select right one. */
4398 for (cs = dest->callers; cs; cs = cs->next_caller)
4399 if (cs->caller == dest && cs->maybe_hot_p ())
4400 return true;
4403 return hot;
4406 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4407 to let a non-self-recursive caller be the first element. Thus, we can
4408 simplify intersecting operations on values that arrive from all of these
4409 callers, especially when there exists self-recursive call. Return true if
4410 this kind of adjustment is possible. */
4412 static bool
4413 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4414 cgraph_node *node)
4416 for (unsigned i = 0; i < callers.length (); i++)
4418 cgraph_edge *cs = callers[i];
4420 if (cs->caller != node)
4422 if (i > 0)
4424 callers[i] = callers[0];
4425 callers[0] = cs;
4427 return true;
4430 return false;
4433 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4434 is assumed their number is known and equal to CALLER_COUNT. */
4436 template <typename valtype>
4437 static vec<cgraph_edge *>
4438 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4439 int caller_count)
4441 ipcp_value_source<valtype> *src;
4442 vec<cgraph_edge *> ret;
4444 ret.create (caller_count);
4445 for (src = val->sources; src; src = src->next)
4447 struct cgraph_edge *cs = src->cs;
4448 while (cs)
4450 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4451 ret.quick_push (cs);
4452 cs = get_next_cgraph_edge_clone (cs);
4456 if (caller_count > 1)
4457 adjust_callers_for_value_intersection (ret, dest);
4459 return ret;
4462 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4463 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4464 should be set to true when the reference created for the constant should be
4465 a load one and not an address one because the corresponding parameter p is
4466 only used as *p. */
4468 static struct ipa_replace_map *
4469 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4470 bool force_load_ref)
4472 struct ipa_replace_map *replace_map;
4474 replace_map = ggc_alloc<ipa_replace_map> ();
4475 if (dump_file)
4477 fprintf (dump_file, " replacing ");
4478 ipa_dump_param (dump_file, info, parm_num);
4480 fprintf (dump_file, " with const ");
4481 print_generic_expr (dump_file, value);
4483 if (force_load_ref)
4484 fprintf (dump_file, " - forcing load reference\n");
4485 else
4486 fprintf (dump_file, "\n");
4488 replace_map->parm_num = parm_num;
4489 replace_map->new_tree = value;
4490 replace_map->force_load_ref = force_load_ref;
4491 return replace_map;
4494 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4495 one, otherwise it will be referred to as the original node. */
4497 static void
4498 dump_profile_updates (cgraph_node *node, bool spec)
4500 if (spec)
4501 fprintf (dump_file, " setting count of the specialized node %s to ",
4502 node->dump_name ());
4503 else
4504 fprintf (dump_file, " setting count of the original node %s to ",
4505 node->dump_name ());
4507 node->count.dump (dump_file);
4508 fprintf (dump_file, "\n");
4509 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4511 fprintf (dump_file, " edge to %s has count ",
4512 cs->callee->dump_name ());
4513 cs->count.dump (dump_file);
4514 fprintf (dump_file, "\n");
4518 /* With partial train run we do not want to assume that original's count is
4519 zero whenever we redurect all executed edges to clone. Simply drop profile
4520 to local one in this case. In eany case, return the new value. ORIG_NODE
4521 is the original node and its count has not been updaed yet. */
4523 profile_count
4524 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4526 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4527 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4528 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4529 remainder = remainder.guessed_local ();
4531 return remainder;
4534 /* Structure to sum counts coming from nodes other than the original node and
4535 its clones. */
4537 struct gather_other_count_struct
4539 cgraph_node *orig;
4540 profile_count other_count;
4543 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4544 counts that come from non-self-recursive calls.. */
4546 static bool
4547 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4549 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4550 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4551 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4552 desc->other_count += cs->count.ipa ();
4553 return false;
4556 /* Structure to help analyze if we need to boost counts of some clones of some
4557 non-recursive edges to match the new callee count. */
4559 struct desc_incoming_count_struct
4561 cgraph_node *orig;
4562 hash_set <cgraph_edge *> *processed_edges;
4563 profile_count count;
4564 unsigned unproc_orig_rec_edges;
4567 /* Go over edges calling NODE and its thunks and gather information about
4568 incoming counts so that we know if we need to make any adjustments. */
4570 static void
4571 analyze_clone_icoming_counts (cgraph_node *node,
4572 desc_incoming_count_struct *desc)
4574 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4575 if (cs->caller->thunk)
4577 analyze_clone_icoming_counts (cs->caller, desc);
4578 continue;
4580 else
4582 if (cs->count.initialized_p ())
4583 desc->count += cs->count.ipa ();
4584 if (!desc->processed_edges->contains (cs)
4585 && cs->caller->clone_of == desc->orig)
4586 desc->unproc_orig_rec_edges++;
4590 /* If caller edge counts of a clone created for a self-recursive arithmetic
4591 jump function must be adjusted because it is coming from a the "seed" clone
4592 for the first value and so has been excessively scaled back as if it was not
4593 a recursive call, adjust it so that the incoming counts of NODE match its
4594 count. NODE is the node or its thunk. */
4596 static void
4597 adjust_clone_incoming_counts (cgraph_node *node,
4598 desc_incoming_count_struct *desc)
4600 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4601 if (cs->caller->thunk)
4603 adjust_clone_incoming_counts (cs->caller, desc);
4604 profile_count sum = profile_count::zero ();
4605 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4606 if (e->count.initialized_p ())
4607 sum += e->count.ipa ();
4608 cs->count = cs->count.combine_with_ipa_count (sum);
4610 else if (!desc->processed_edges->contains (cs)
4611 && cs->caller->clone_of == desc->orig)
4613 cs->count += desc->count;
4614 if (dump_file)
4616 fprintf (dump_file, " Adjusted count of an incoming edge of "
4617 "a clone %s -> %s to ", cs->caller->dump_name (),
4618 cs->callee->dump_name ());
4619 cs->count.dump (dump_file);
4620 fprintf (dump_file, "\n");
4625 /* When ORIG_NODE has been cloned for values which have been generated fora
4626 self-recursive call as a result of an arithmetic pass-through
4627 jump-functions, adjust its count together with counts of all such clones in
4628 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4630 The function sums the counts of the original node and all its clones that
4631 cannot be attributed to a specific clone because it comes from a
4632 non-recursive edge. This sum is then evenly divided between the clones and
4633 on top of that each one gets all the counts which can be attributed directly
4634 to it. */
4636 static void
4637 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4638 const vec<cgraph_node *> &self_gen_clones)
4640 profile_count redist_sum = orig_node->count.ipa ();
4641 if (!(redist_sum > profile_count::zero ()))
4642 return;
4644 if (dump_file)
4645 fprintf (dump_file, " Updating profile of self recursive clone "
4646 "series\n");
4648 gather_other_count_struct gocs;
4649 gocs.orig = orig_node;
4650 gocs.other_count = profile_count::zero ();
4652 auto_vec <profile_count, 8> other_edges_count;
4653 for (cgraph_node *n : self_gen_clones)
4655 gocs.other_count = profile_count::zero ();
4656 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4657 &gocs, false);
4658 other_edges_count.safe_push (gocs.other_count);
4659 redist_sum -= gocs.other_count;
4662 hash_set<cgraph_edge *> processed_edges;
4663 unsigned i = 0;
4664 for (cgraph_node *n : self_gen_clones)
4666 profile_count orig_count = n->count;
4667 profile_count new_count
4668 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4669 new_count = lenient_count_portion_handling (new_count, orig_node);
4670 n->count = new_count;
4671 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4672 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4674 cs->count = cs->count.apply_scale (new_count, orig_count);
4675 processed_edges.add (cs);
4677 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4678 cs->count = cs->count.apply_scale (new_count, orig_count);
4680 i++;
4683 /* There are still going to be edges to ORIG_NODE that have one or more
4684 clones coming from another node clone in SELF_GEN_CLONES and which we
4685 scaled by the same amount, which means that the total incoming sum of
4686 counts to ORIG_NODE will be too high, scale such edges back. */
4687 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4689 if (cs->callee->ultimate_alias_target () == orig_node)
4691 unsigned den = 0;
4692 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4693 if (e->callee->ultimate_alias_target () == orig_node
4694 && processed_edges.contains (e))
4695 den++;
4696 if (den > 0)
4697 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4698 if (e->callee->ultimate_alias_target () == orig_node
4699 && processed_edges.contains (e))
4700 e->count /= den;
4704 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4705 along self-recursive edges are likely to have fairly low count and so
4706 edges from them to nodes in the self_gen_clones do not correspond to the
4707 artificially distributed count of the nodes, the total sum of incoming
4708 edges to some clones might be too low. Detect this situation and correct
4709 it. */
4710 for (cgraph_node *n : self_gen_clones)
4712 if (!(n->count.ipa () > profile_count::zero ()))
4713 continue;
4715 desc_incoming_count_struct desc;
4716 desc.orig = orig_node;
4717 desc.processed_edges = &processed_edges;
4718 desc.count = profile_count::zero ();
4719 desc.unproc_orig_rec_edges = 0;
4720 analyze_clone_icoming_counts (n, &desc);
4722 if (n->count.differs_from_p (desc.count))
4724 if (n->count > desc.count
4725 && desc.unproc_orig_rec_edges > 0)
4727 desc.count = n->count - desc.count;
4728 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4729 adjust_clone_incoming_counts (n, &desc);
4731 else if (dump_file)
4732 fprintf (dump_file,
4733 " Unable to fix up incoming counts for %s.\n",
4734 n->dump_name ());
4738 if (dump_file)
4739 for (cgraph_node *n : self_gen_clones)
4740 dump_profile_updates (n, n != orig_node);
4741 return;
4744 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4745 their profile information to reflect this. This function should not be used
4746 for clones generated for arithmetic pass-through jump functions on a
4747 self-recursive call graph edge, that situation is handled by
4748 update_counts_for_self_gen_clones. */
4750 static void
4751 update_profiling_info (struct cgraph_node *orig_node,
4752 struct cgraph_node *new_node)
4754 struct caller_statistics stats;
4755 profile_count new_sum;
4756 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4758 if (!(orig_node_count > profile_count::zero ()))
4759 return;
4761 if (dump_file)
4763 fprintf (dump_file, " Updating profile from original count: ");
4764 orig_node_count.dump (dump_file);
4765 fprintf (dump_file, "\n");
4768 init_caller_stats (&stats, new_node);
4769 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4770 false);
4771 new_sum = stats.count_sum;
4773 bool orig_edges_processed = false;
4774 if (new_sum > orig_node_count)
4776 /* TODO: Profile has alreay gone astray, keep what we have but lower it
4777 to global0 category. */
4778 remainder = orig_node->count.global0 ();
4780 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4781 cs->count = cs->count.global0 ();
4782 for (cgraph_edge *cs = orig_node->indirect_calls;
4784 cs = cs->next_callee)
4785 cs->count = cs->count.global0 ();
4786 orig_edges_processed = true;
4788 else if (stats.rec_count_sum.nonzero_p ())
4790 int new_nonrec_calls = stats.n_nonrec_calls;
4791 /* There are self-recursive edges which are likely to bring in the
4792 majority of calls but which we must divide in between the original and
4793 new node. */
4794 init_caller_stats (&stats, orig_node);
4795 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4796 &stats, false);
4797 int orig_nonrec_calls = stats.n_nonrec_calls;
4798 profile_count orig_nonrec_call_count = stats.count_sum;
4800 if (orig_node->local)
4802 if (!orig_nonrec_call_count.nonzero_p ())
4804 if (dump_file)
4805 fprintf (dump_file, " The original is local and the only "
4806 "incoming edges from non-dead callers with nonzero "
4807 "counts are self-recursive, assuming it is cold.\n");
4808 /* The NEW_NODE count and counts of all its outgoing edges
4809 are still unmodified copies of ORIG_NODE's. Just clear
4810 the latter and bail out. */
4811 profile_count zero;
4812 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4813 zero = profile_count::zero ().guessed_local ();
4814 else
4815 zero = profile_count::adjusted_zero ();
4816 orig_node->count = zero;
4817 for (cgraph_edge *cs = orig_node->callees;
4819 cs = cs->next_callee)
4820 cs->count = zero;
4821 for (cgraph_edge *cs = orig_node->indirect_calls;
4823 cs = cs->next_callee)
4824 cs->count = zero;
4825 return;
4828 else
4830 /* Let's behave as if there was another caller that accounts for all
4831 the calls that were either indirect or from other compilation
4832 units. */
4833 orig_nonrec_calls++;
4834 profile_count pretend_caller_count
4835 = (orig_node_count - new_sum - orig_nonrec_call_count
4836 - stats.rec_count_sum);
4837 orig_nonrec_call_count += pretend_caller_count;
4840 /* Divide all "unexplained" counts roughly proportionally to sums of
4841 counts of non-recursive calls.
4843 We put rather arbitrary limits on how many counts we claim because the
4844 number of non-self-recursive incoming count is only a rough guideline
4845 and there are cases (such as mcf) where using it blindly just takes
4846 too many. And if lattices are considered in the opposite order we
4847 could also take too few. */
4848 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4850 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4851 profile_count new_part
4852 = MAX(MIN (unexp.apply_scale (new_sum,
4853 new_sum + orig_nonrec_call_count),
4854 unexp.apply_scale (limit_den - 1, limit_den)),
4855 unexp.apply_scale (new_nonrec_calls, limit_den));
4856 if (dump_file)
4858 fprintf (dump_file, " Claiming ");
4859 new_part.dump (dump_file);
4860 fprintf (dump_file, " of unexplained ");
4861 unexp.dump (dump_file);
4862 fprintf (dump_file, " counts because of self-recursive "
4863 "calls\n");
4865 new_sum += new_part;
4866 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4867 orig_node);
4869 else
4870 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4871 orig_node);
4873 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4874 new_node->count = new_sum;
4875 orig_node->count = remainder;
4877 profile_count orig_new_node_count = orig_node_count;
4878 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4879 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4880 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4881 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4882 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4884 if (!orig_edges_processed)
4886 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4887 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4888 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4889 for (cgraph_edge *cs = orig_node->indirect_calls;
4891 cs = cs->next_callee)
4892 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4895 if (dump_file)
4897 dump_profile_updates (new_node, true);
4898 dump_profile_updates (orig_node, false);
4902 /* Update the respective profile of specialized NEW_NODE and the original
4903 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4904 have been redirected to the specialized version. */
4906 static void
4907 update_specialized_profile (struct cgraph_node *new_node,
4908 struct cgraph_node *orig_node,
4909 profile_count redirected_sum)
4911 struct cgraph_edge *cs;
4912 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
4914 if (dump_file)
4916 fprintf (dump_file, " the sum of counts of redirected edges is ");
4917 redirected_sum.dump (dump_file);
4918 fprintf (dump_file, "\n old ipa count of the original node is ");
4919 orig_node_count.dump (dump_file);
4920 fprintf (dump_file, "\n");
4922 if (!(orig_node_count > profile_count::zero ()))
4923 return;
4925 new_node_count = new_node->count;
4926 new_node->count += redirected_sum;
4927 orig_node->count
4928 = lenient_count_portion_handling (orig_node->count - redirected_sum,
4929 orig_node);
4931 for (cs = new_node->callees; cs; cs = cs->next_callee)
4932 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4934 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4936 profile_count dec = cs->count.apply_scale (redirected_sum,
4937 orig_node_count);
4938 cs->count -= dec;
4941 if (dump_file)
4943 dump_profile_updates (new_node, true);
4944 dump_profile_updates (orig_node, false);
4948 static void adjust_references_in_caller (cgraph_edge *cs,
4949 symtab_node *symbol, int index);
4951 /* Simple structure to pass a symbol and index (with same meaning as parameters
4952 of adjust_references_in_caller) through a void* parameter of a
4953 call_for_symbol_thunks_and_aliases callback. */
4954 struct symbol_and_index_together
4956 symtab_node *symbol;
4957 int index;
4960 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4961 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4962 static bool
4963 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4965 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4966 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4967 if (!cs->caller->thunk)
4968 adjust_references_in_caller (cs, pack->symbol, pack->index);
4969 return false;
4972 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4973 variable which is only dereferenced and which is represented by SYMBOL. See
4974 if we can remove ADDR reference in callers assosiated witht the call. */
4976 static void
4977 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4979 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4980 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4981 if (jfunc->type == IPA_JF_CONST)
4983 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4984 cs->lto_stmt_uid,
4985 IPA_REF_ADDR);
4986 if (!to_del)
4987 return;
4988 to_del->remove_reference ();
4989 ipa_zap_jf_refdesc (jfunc);
4990 if (dump_file)
4991 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4992 cs->caller->dump_name (), symbol->dump_name ());
4993 return;
4996 if (jfunc->type != IPA_JF_PASS_THROUGH
4997 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4998 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4999 return;
5001 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5002 cgraph_node *caller = cs->caller;
5003 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5004 /* TODO: This consistency check may be too big and not really
5005 that useful. Consider removing it. */
5006 tree cst;
5007 if (caller_info->ipcp_orig_node)
5008 cst = caller_info->known_csts[fidx];
5009 else
5011 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5012 gcc_assert (lat->is_single_const ());
5013 cst = lat->values->value;
5015 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5016 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5017 == symbol));
5019 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5020 if (cuses == IPA_UNDESCRIBED_USE)
5021 return;
5022 gcc_assert (cuses > 0);
5023 cuses--;
5024 ipa_set_controlled_uses (caller_info, fidx, cuses);
5025 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5026 if (dump_file && (dump_flags & TDF_DETAILS))
5027 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5028 "to %i.\n", fidx, caller->dump_name (), cuses);
5029 if (cuses)
5030 return;
5032 if (caller_info->ipcp_orig_node)
5034 /* Cloning machinery has created a reference here, we need to either
5035 remove it or change it to a read one. */
5036 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5037 if (to_del)
5039 to_del->remove_reference ();
5040 if (dump_file)
5041 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5042 cs->caller->dump_name (), symbol->dump_name ());
5043 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5045 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5046 if (dump_file)
5047 fprintf (dump_file,
5048 " ...and replaced it with LOAD one.\n");
5053 symbol_and_index_together pack;
5054 pack.symbol = symbol;
5055 pack.index = fidx;
5056 if (caller->can_change_signature)
5057 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5058 &pack, true);
5062 /* Return true if we would like to remove a parameter from NODE when cloning it
5063 with KNOWN_CSTS scalar constants. */
5065 static bool
5066 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5068 auto_vec<bool, 16> surviving;
5069 bool filled_vec = false;
5070 ipa_node_params *info = ipa_node_params_sum->get (node);
5071 int i, count = ipa_get_param_count (info);
5073 for (i = 0; i < count; i++)
5075 if (!known_csts[i] && ipa_is_param_used (info, i))
5076 continue;
5078 if (!filled_vec)
5080 clone_info *info = clone_info::get (node);
5081 if (!info || !info->param_adjustments)
5082 return true;
5083 info->param_adjustments->get_surviving_params (&surviving);
5084 filled_vec = true;
5086 if (surviving.length() < (unsigned) i && surviving[i])
5087 return true;
5089 return false;
5092 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5093 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5094 redirect all edges in CALLERS to it. */
5096 static struct cgraph_node *
5097 create_specialized_node (struct cgraph_node *node,
5098 vec<tree> known_csts,
5099 vec<ipa_polymorphic_call_context> known_contexts,
5100 vec<ipa_argagg_value, va_gc> *aggvals,
5101 vec<cgraph_edge *> &callers)
5103 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5104 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5105 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5106 struct cgraph_node *new_node;
5107 int i, count = ipa_get_param_count (info);
5108 clone_info *cinfo = clone_info::get (node);
5109 ipa_param_adjustments *old_adjustments = cinfo
5110 ? cinfo->param_adjustments : NULL;
5111 ipa_param_adjustments *new_adjustments;
5112 gcc_assert (!info->ipcp_orig_node);
5113 gcc_assert (node->can_change_signature
5114 || !old_adjustments);
5116 if (old_adjustments)
5118 /* At the moment all IPA optimizations should use the number of
5119 parameters of the prevailing decl as the m_always_copy_start.
5120 Handling any other value would complicate the code below, so for the
5121 time bing let's only assert it is so. */
5122 gcc_assert (old_adjustments->m_always_copy_start == count
5123 || old_adjustments->m_always_copy_start < 0);
5124 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5125 for (i = 0; i < old_adj_count; i++)
5127 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5128 if (!node->can_change_signature
5129 || old_adj->op != IPA_PARAM_OP_COPY
5130 || (!known_csts[old_adj->base_index]
5131 && ipa_is_param_used (info, old_adj->base_index)))
5133 ipa_adjusted_param new_adj = *old_adj;
5135 new_adj.prev_clone_adjustment = true;
5136 new_adj.prev_clone_index = i;
5137 vec_safe_push (new_params, new_adj);
5140 bool skip_return = old_adjustments->m_skip_return;
5141 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5142 ipa_param_adjustments (new_params, count,
5143 skip_return));
5145 else if (node->can_change_signature
5146 && want_remove_some_param_p (node, known_csts))
5148 ipa_adjusted_param adj;
5149 memset (&adj, 0, sizeof (adj));
5150 adj.op = IPA_PARAM_OP_COPY;
5151 for (i = 0; i < count; i++)
5152 if (!known_csts[i] && ipa_is_param_used (info, i))
5154 adj.base_index = i;
5155 adj.prev_clone_index = i;
5156 vec_safe_push (new_params, adj);
5158 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5159 ipa_param_adjustments (new_params, count, false));
5161 else
5162 new_adjustments = NULL;
5164 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5165 for (i = callers.length () - 1; i >= 0; i--)
5167 cgraph_edge *cs = callers[i];
5168 if (cs->caller == node)
5170 self_recursive_calls.safe_push (cs);
5171 callers.unordered_remove (i);
5174 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5175 for (i = 0; i < count; i++)
5177 tree t = known_csts[i];
5178 if (!t)
5179 continue;
5181 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5183 bool load_ref = false;
5184 symtab_node *ref_symbol;
5185 if (TREE_CODE (t) == ADDR_EXPR)
5187 tree base = get_base_address (TREE_OPERAND (t, 0));
5188 if (TREE_CODE (base) == VAR_DECL
5189 && ipa_get_controlled_uses (info, i) == 0
5190 && ipa_get_param_load_dereferenced (info, i)
5191 && (ref_symbol = symtab_node::get (base)))
5193 load_ref = true;
5194 if (node->can_change_signature)
5195 for (cgraph_edge *caller : callers)
5196 adjust_references_in_caller (caller, ref_symbol, i);
5200 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5201 if (replace_map)
5202 vec_safe_push (replace_trees, replace_map);
5205 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5206 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5207 node->decl)));
5208 new_node = node->create_virtual_clone (callers, replace_trees,
5209 new_adjustments, "constprop",
5210 suffix_counter);
5211 suffix_counter++;
5213 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5214 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5216 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5217 /* Cloned edges can disappear during cloning as speculation can be
5218 resolved, check that we have one and that it comes from the last
5219 cloning. */
5220 if (cs && cs->caller == new_node)
5221 cs->redirect_callee_duplicating_thunks (new_node);
5222 /* Any future code that would make more than one clone of an outgoing
5223 edge would confuse this mechanism, so let's check that does not
5224 happen. */
5225 gcc_checking_assert (!cs
5226 || !get_next_cgraph_edge_clone (cs)
5227 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5229 if (have_self_recursive_calls)
5230 new_node->expand_all_artificial_thunks ();
5232 ipa_set_node_agg_value_chain (new_node, aggvals);
5233 for (const ipa_argagg_value &av : aggvals)
5234 new_node->maybe_create_reference (av.value, NULL);
5236 if (dump_file && (dump_flags & TDF_DETAILS))
5238 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5239 if (known_contexts.exists ())
5241 for (i = 0; i < count; i++)
5242 if (!known_contexts[i].useless_p ())
5244 fprintf (dump_file, " known ctx %i is ", i);
5245 known_contexts[i].dump (dump_file);
5248 if (aggvals)
5250 fprintf (dump_file, " Aggregate replacements:");
5251 ipa_argagg_value_list avs (aggvals);
5252 avs.dump (dump_file);
5256 new_info = ipa_node_params_sum->get (new_node);
5257 new_info->ipcp_orig_node = node;
5258 new_node->ipcp_clone = true;
5259 new_info->known_csts = known_csts;
5260 new_info->known_contexts = known_contexts;
5262 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5263 aggvals);
5265 return new_node;
5268 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5269 pass-through function to itself when the cgraph_node involved is not an
5270 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5271 no-operation pass-through. */
5273 static bool
5274 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5275 bool simple = true)
5277 enum availability availability;
5278 if (cs->caller == cs->callee->function_symbol (&availability)
5279 && availability > AVAIL_INTERPOSABLE
5280 && jfunc->type == IPA_JF_PASS_THROUGH
5281 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5282 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5283 && ipa_node_params_sum->get (cs->caller)
5284 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5285 return true;
5286 return false;
5289 /* Return true if JFUNC, which describes a part of an aggregate represented or
5290 pointed to by the i-th parameter of call CS, is a pass-through function to
5291 itself when the cgraph_node involved is not an IPA-CP clone.. When
5292 SIMPLE is true, further check if JFUNC is a simple no-operation
5293 pass-through. */
5295 static bool
5296 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5297 const ipa_agg_jf_item *jfunc,
5298 int i, bool simple = true)
5300 enum availability availability;
5301 if (cs->caller == cs->callee->function_symbol (&availability)
5302 && availability > AVAIL_INTERPOSABLE
5303 && jfunc->jftype == IPA_JF_LOAD_AGG
5304 && jfunc->offset == jfunc->value.load_agg.offset
5305 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5306 && jfunc->value.pass_through.formal_id == i
5307 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5308 && ipa_node_params_sum->get (cs->caller)
5309 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5310 return true;
5311 return false;
5314 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5315 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5317 static void
5318 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5319 vec<tree> &known_csts,
5320 const vec<cgraph_edge *> &callers)
5322 ipa_node_params *info = ipa_node_params_sum->get (node);
5323 int i, count = ipa_get_param_count (info);
5325 for (i = 0; i < count; i++)
5327 struct cgraph_edge *cs;
5328 tree newval = NULL_TREE;
5329 int j;
5330 bool first = true;
5331 tree type = ipa_get_type (info, i);
5333 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5334 continue;
5336 FOR_EACH_VEC_ELT (callers, j, cs)
5338 struct ipa_jump_func *jump_func;
5339 tree t;
5341 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5342 if (!args
5343 || i >= ipa_get_cs_argument_count (args)
5344 || (i == 0
5345 && call_passes_through_thunk (cs)))
5347 newval = NULL_TREE;
5348 break;
5350 jump_func = ipa_get_ith_jump_func (args, i);
5352 /* Besides simple pass-through jump function, arithmetic jump
5353 function could also introduce argument-direct-pass-through for
5354 self-feeding recursive call. For example,
5356 fn (int i)
5358 fn (i & 1);
5361 Given that i is 0, recursive propagation via (i & 1) also gets
5362 0. */
5363 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5365 gcc_assert (newval);
5366 t = ipa_get_jf_arith_result (
5367 ipa_get_jf_pass_through_operation (jump_func),
5368 newval,
5369 ipa_get_jf_pass_through_operand (jump_func),
5370 type);
5372 else
5373 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5374 jump_func, type);
5375 if (!t
5376 || (newval
5377 && !values_equal_for_ipcp_p (t, newval))
5378 || (!first && !newval))
5380 newval = NULL_TREE;
5381 break;
5383 else
5384 newval = t;
5385 first = false;
5388 if (newval)
5390 if (dump_file && (dump_flags & TDF_DETAILS))
5392 fprintf (dump_file, " adding an extra known scalar value ");
5393 print_ipcp_constant_value (dump_file, newval);
5394 fprintf (dump_file, " for ");
5395 ipa_dump_param (dump_file, info, i);
5396 fprintf (dump_file, "\n");
5399 known_csts[i] = newval;
5404 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5405 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5406 CALLERS. */
5408 static void
5409 find_more_contexts_for_caller_subset (cgraph_node *node,
5410 vec<ipa_polymorphic_call_context>
5411 *known_contexts,
5412 const vec<cgraph_edge *> &callers)
5414 ipa_node_params *info = ipa_node_params_sum->get (node);
5415 int i, count = ipa_get_param_count (info);
5417 for (i = 0; i < count; i++)
5419 cgraph_edge *cs;
5421 if (ipa_get_poly_ctx_lat (info, i)->bottom
5422 || (known_contexts->exists ()
5423 && !(*known_contexts)[i].useless_p ()))
5424 continue;
5426 ipa_polymorphic_call_context newval;
5427 bool first = true;
5428 int j;
5430 FOR_EACH_VEC_ELT (callers, j, cs)
5432 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5433 if (!args
5434 || i >= ipa_get_cs_argument_count (args))
5435 return;
5436 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5437 ipa_polymorphic_call_context ctx;
5438 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5439 cs, i, jfunc);
5440 if (first)
5442 newval = ctx;
5443 first = false;
5445 else
5446 newval.meet_with (ctx);
5447 if (newval.useless_p ())
5448 break;
5451 if (!newval.useless_p ())
5453 if (dump_file && (dump_flags & TDF_DETAILS))
5455 fprintf (dump_file, " adding an extra known polymorphic "
5456 "context ");
5457 print_ipcp_constant_value (dump_file, newval);
5458 fprintf (dump_file, " for ");
5459 ipa_dump_param (dump_file, info, i);
5460 fprintf (dump_file, "\n");
5463 if (!known_contexts->exists ())
5464 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5465 true);
5466 (*known_contexts)[i] = newval;
5472 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5473 RES. If INTERIM is non-NULL, it contains the current interim state of
5474 collected aggregate values which can be used to compute values passed over
5475 self-recursive edges.
5477 This basically one iteration of push_agg_values_from_edge over one
5478 parameter, which allows for simpler early returns. */
5480 static void
5481 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5482 vec<ipa_argagg_value> *res,
5483 const ipa_argagg_value_list *interim)
5485 bool agg_values_from_caller = false;
5486 bool agg_jf_preserved = false;
5487 unsigned unit_delta = UINT_MAX;
5488 int src_idx = -1;
5489 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5490 index);
5492 if (jfunc->type == IPA_JF_PASS_THROUGH
5493 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5495 agg_values_from_caller = true;
5496 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5497 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5498 unit_delta = 0;
5500 else if (jfunc->type == IPA_JF_ANCESTOR
5501 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5503 agg_values_from_caller = true;
5504 agg_jf_preserved = true;
5505 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5506 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5509 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5510 if (agg_values_from_caller)
5512 if (caller_info->ipcp_orig_node)
5514 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5515 ipcp_transformation *ts
5516 = ipcp_get_transformation_summary (cs->caller);
5517 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5518 ipcp_param_lattices *orig_plats
5519 = ipa_get_parm_lattices (orig_info, src_idx);
5520 if (ts
5521 && orig_plats->aggs
5522 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5524 ipa_argagg_value_list src (ts);
5525 src.push_adjusted_values (src_idx, index, unit_delta, res);
5526 return;
5529 else
5531 ipcp_param_lattices *src_plats
5532 = ipa_get_parm_lattices (caller_info, src_idx);
5533 if (src_plats->aggs
5534 && !src_plats->aggs_bottom
5535 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5537 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5539 interim->push_adjusted_values (src_idx, index, unit_delta,
5540 res);
5541 return;
5543 if (!src_plats->aggs_contain_variable)
5545 push_agg_values_from_plats (src_plats, index, unit_delta,
5546 res);
5547 return;
5553 if (!jfunc->agg.items)
5554 return;
5555 bool first = true;
5556 unsigned prev_unit_offset = 0;
5557 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5559 tree value, srcvalue;
5560 /* Besides simple pass-through aggregate jump function, arithmetic
5561 aggregate jump function could also bring same aggregate value as
5562 parameter passed-in for self-feeding recursive call. For example,
5564 fn (int *i)
5566 int j = *i & 1;
5567 fn (&j);
5570 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5571 if (interim
5572 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5573 && (srcvalue = interim->get_value(index,
5574 agg_jf.offset / BITS_PER_UNIT)))
5575 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5576 srcvalue,
5577 agg_jf.value.pass_through.operand,
5578 agg_jf.type);
5579 else
5580 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5581 &agg_jf);
5582 if (value)
5584 struct ipa_argagg_value iav;
5585 iav.value = value;
5586 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5587 iav.index = index;
5588 iav.by_ref = jfunc->agg.by_ref;
5589 iav.killed = false;
5591 gcc_assert (first
5592 || iav.unit_offset > prev_unit_offset);
5593 prev_unit_offset = iav.unit_offset;
5594 first = false;
5596 res->safe_push (iav);
5599 return;
5602 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5603 description of ultimate callee of CS or the one it was cloned from (the
5604 summary where lattices are). If INTERIM is non-NULL, it contains the
5605 current interim state of collected aggregate values which can be used to
5606 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5607 is true) and to skip values which clearly will not be part of intersection
5608 with INTERIM. */
5610 static void
5611 push_agg_values_from_edge (struct cgraph_edge *cs,
5612 ipa_node_params *dest_info,
5613 vec<ipa_argagg_value> *res,
5614 const ipa_argagg_value_list *interim,
5615 bool optimize_self_recursion)
5617 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5618 if (!args)
5619 return;
5621 int count = MIN (ipa_get_param_count (dest_info),
5622 ipa_get_cs_argument_count (args));
5624 unsigned interim_index = 0;
5625 for (int index = 0; index < count; index++)
5627 if (interim)
5629 while (interim_index < interim->m_elts.size ()
5630 && interim->m_elts[interim_index].value
5631 && interim->m_elts[interim_index].index < index)
5632 interim_index++;
5633 if (interim_index >= interim->m_elts.size ()
5634 || interim->m_elts[interim_index].index > index)
5635 continue;
5638 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5639 if (!ipa_is_param_used (dest_info, index)
5640 || plats->aggs_bottom)
5641 continue;
5642 push_agg_values_for_index_from_edge (cs, index, res,
5643 optimize_self_recursion ? interim
5644 : NULL);
5649 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5650 from all of them. Return nullptr if there are none. */
5652 static struct vec<ipa_argagg_value, va_gc> *
5653 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5654 const vec<cgraph_edge *> &callers)
5656 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5657 if (dest_info->ipcp_orig_node)
5658 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5660 /* gather_edges_for_value puts a non-recursive call into the first element of
5661 callers if it can. */
5662 auto_vec<ipa_argagg_value, 32> interim;
5663 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5665 unsigned valid_entries = interim.length ();
5666 if (!valid_entries)
5667 return nullptr;
5669 unsigned caller_count = callers.length();
5670 for (unsigned i = 1; i < caller_count; i++)
5672 auto_vec<ipa_argagg_value, 32> last;
5673 ipa_argagg_value_list avs (&interim);
5674 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5676 valid_entries = intersect_argaggs_with (interim, last);
5677 if (!valid_entries)
5678 return nullptr;
5681 vec<ipa_argagg_value, va_gc> *res = NULL;
5682 vec_safe_reserve_exact (res, valid_entries);
5683 for (const ipa_argagg_value &av : interim)
5684 if (av.value)
5685 res->quick_push(av);
5686 gcc_checking_assert (res->length () == valid_entries);
5687 return res;
5690 /* Determine whether CS also brings all scalar values that the NODE is
5691 specialized for. */
5693 static bool
5694 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5695 struct cgraph_node *node)
5697 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5698 int count = ipa_get_param_count (dest_info);
5699 class ipa_node_params *caller_info;
5700 class ipa_edge_args *args;
5701 int i;
5703 caller_info = ipa_node_params_sum->get (cs->caller);
5704 args = ipa_edge_args_sum->get (cs);
5705 for (i = 0; i < count; i++)
5707 struct ipa_jump_func *jump_func;
5708 tree val, t;
5710 val = dest_info->known_csts[i];
5711 if (!val)
5712 continue;
5714 if (i >= ipa_get_cs_argument_count (args))
5715 return false;
5716 jump_func = ipa_get_ith_jump_func (args, i);
5717 t = ipa_value_from_jfunc (caller_info, jump_func,
5718 ipa_get_type (dest_info, i));
5719 if (!t || !values_equal_for_ipcp_p (val, t))
5720 return false;
5722 return true;
5725 /* Determine whether CS also brings all aggregate values that NODE is
5726 specialized for. */
5728 static bool
5729 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5730 struct cgraph_node *node)
5732 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5733 if (!ts || vec_safe_is_empty (ts->m_agg_values))
5734 return true;
5736 const ipa_argagg_value_list existing (ts->m_agg_values);
5737 auto_vec<ipa_argagg_value, 32> edge_values;
5738 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5739 gcc_checking_assert (dest_info->ipcp_orig_node);
5740 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5741 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5742 const ipa_argagg_value_list avl (&edge_values);
5743 return avl.superset_of_p (existing);
5746 /* Given an original NODE and a VAL for which we have already created a
5747 specialized clone, look whether there are incoming edges that still lead
5748 into the old node but now also bring the requested value and also conform to
5749 all other criteria such that they can be redirected the special node.
5750 This function can therefore redirect the final edge in a SCC. */
5752 template <typename valtype>
5753 static void
5754 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5756 ipcp_value_source<valtype> *src;
5757 profile_count redirected_sum = profile_count::zero ();
5759 for (src = val->sources; src; src = src->next)
5761 struct cgraph_edge *cs = src->cs;
5762 while (cs)
5764 if (cgraph_edge_brings_value_p (cs, src, node, val)
5765 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5766 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5768 if (dump_file)
5769 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5770 cs->caller->dump_name (),
5771 val->spec_node->dump_name ());
5773 cs->redirect_callee_duplicating_thunks (val->spec_node);
5774 val->spec_node->expand_all_artificial_thunks ();
5775 if (cs->count.ipa ().initialized_p ())
5776 redirected_sum = redirected_sum + cs->count.ipa ();
5778 cs = get_next_cgraph_edge_clone (cs);
5782 if (redirected_sum.nonzero_p ())
5783 update_specialized_profile (val->spec_node, node, redirected_sum);
5786 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5788 static bool
5789 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5791 ipa_polymorphic_call_context *ctx;
5792 int i;
5794 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5795 if (!ctx->useless_p ())
5796 return true;
5797 return false;
5800 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5802 static vec<ipa_polymorphic_call_context>
5803 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5805 if (known_contexts_useful_p (known_contexts))
5806 return known_contexts.copy ();
5807 else
5808 return vNULL;
5811 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5812 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5813 copy too. */
5815 static void
5816 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5817 vec<tree> *known_csts,
5818 vec<ipa_polymorphic_call_context> *known_contexts,
5819 ipcp_value<tree> *val, int index)
5821 *known_csts = avals->m_known_vals.copy ();
5822 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5823 (*known_csts)[index] = val->value;
5826 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5827 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5828 INDEX. */
5830 static void
5831 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5832 vec<tree> *known_csts,
5833 vec<ipa_polymorphic_call_context> *known_contexts,
5834 ipcp_value<ipa_polymorphic_call_context> *val,
5835 int index)
5837 *known_csts = avals->m_known_vals.copy ();
5838 *known_contexts = avals->m_known_contexts.copy ();
5839 (*known_contexts)[index] = val->value;
5842 /* Return true if OFFSET indicates this was not an aggregate value or there is
5843 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5844 AGGVALS list. */
5846 DEBUG_FUNCTION bool
5847 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
5848 int index, HOST_WIDE_INT offset, tree value)
5850 if (offset == -1)
5851 return true;
5853 const ipa_argagg_value_list avl (aggvals);
5854 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
5855 return v && values_equal_for_ipcp_p (v, value);
5858 /* Return true if offset is minus one because source of a polymorphic context
5859 cannot be an aggregate value. */
5861 DEBUG_FUNCTION bool
5862 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
5863 int , HOST_WIDE_INT offset,
5864 ipa_polymorphic_call_context)
5866 return offset == -1;
5869 /* Decide whether to create a special version of NODE for value VAL of
5870 parameter at the given INDEX. If OFFSET is -1, the value is for the
5871 parameter itself, otherwise it is stored at the given OFFSET of the
5872 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5873 is a vector which contains clones created for self-recursive calls with an
5874 arithmetic pass-through jump function. */
5876 template <typename valtype>
5877 static bool
5878 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5879 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
5880 vec<cgraph_node *> *self_gen_clones)
5882 int caller_count;
5883 sreal freq_sum;
5884 profile_count count_sum, rec_count_sum;
5885 vec<cgraph_edge *> callers;
5887 if (val->spec_node)
5889 perhaps_add_new_callers (node, val);
5890 return false;
5892 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5894 if (dump_file && (dump_flags & TDF_DETAILS))
5895 fprintf (dump_file, " Ignoring candidate value because "
5896 "maximum unit size would be reached with %li.\n",
5897 val->local_size_cost + overall_size);
5898 return false;
5900 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5901 &rec_count_sum, &count_sum))
5902 return false;
5904 if (!dbg_cnt (ipa_cp_values))
5905 return false;
5907 if (val->self_recursion_generated_p ())
5909 /* The edge counts in this case might not have been adjusted yet.
5910 Nevertleless, even if they were it would be only a guesswork which we
5911 can do now. The recursive part of the counts can be derived from the
5912 count of the original node anyway. */
5913 if (node->count.ipa ().nonzero_p ())
5915 unsigned dem = self_gen_clones->length () + 1;
5916 rec_count_sum = node->count.ipa () / dem;
5918 else
5919 rec_count_sum = profile_count::zero ();
5922 /* get_info_about_necessary_edges only sums up ipa counts. */
5923 count_sum += rec_count_sum;
5925 if (dump_file && (dump_flags & TDF_DETAILS))
5927 fprintf (dump_file, " - considering value ");
5928 print_ipcp_constant_value (dump_file, val->value);
5929 fprintf (dump_file, " for ");
5930 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
5931 if (offset != -1)
5932 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5933 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
5936 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5937 freq_sum, count_sum,
5938 val->local_size_cost)
5939 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
5940 freq_sum, count_sum, val->prop_size_cost))
5941 return false;
5943 if (dump_file)
5944 fprintf (dump_file, " Creating a specialized node of %s.\n",
5945 node->dump_name ());
5947 vec<tree> known_csts;
5948 vec<ipa_polymorphic_call_context> known_contexts;
5950 callers = gather_edges_for_value (val, node, caller_count);
5951 if (offset == -1)
5952 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
5953 else
5955 known_csts = avals->m_known_vals.copy ();
5956 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5958 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5959 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5960 vec<ipa_argagg_value, va_gc> *aggvals
5961 = find_aggregate_values_for_callers_subset (node, callers);
5962 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5963 offset, val->value));
5964 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5965 aggvals, callers);
5967 if (val->self_recursion_generated_p ())
5968 self_gen_clones->safe_push (val->spec_node);
5969 else
5970 update_profiling_info (node, val->spec_node);
5972 callers.release ();
5973 overall_size += val->local_size_cost;
5974 if (dump_file && (dump_flags & TDF_DETAILS))
5975 fprintf (dump_file, " overall size reached %li\n",
5976 overall_size);
5978 /* TODO: If for some lattice there is only one other known value
5979 left, make a special node for it too. */
5981 return true;
5984 /* Like irange::contains_p(), but convert VAL to the range of R if
5985 necessary. */
5987 static inline bool
5988 ipa_range_contains_p (const vrange &r, tree val)
5990 if (r.undefined_p ())
5991 return false;
5993 tree type = r.type ();
5994 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
5995 return false;
5997 val = fold_convert (type, val);
5998 return r.contains_p (val);
6001 /* Decide whether and what specialized clones of NODE should be created. */
6003 static bool
6004 decide_whether_version_node (struct cgraph_node *node)
6006 ipa_node_params *info = ipa_node_params_sum->get (node);
6007 int i, count = ipa_get_param_count (info);
6008 bool ret = false;
6010 if (count == 0)
6011 return false;
6013 if (dump_file && (dump_flags & TDF_DETAILS))
6014 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6015 node->dump_name ());
6017 auto_vec <cgraph_node *, 9> self_gen_clones;
6018 ipa_auto_call_arg_values avals;
6019 gather_context_independent_values (info, &avals, false, NULL);
6021 for (i = 0; i < count;i++)
6023 if (!ipa_is_param_used (info, i))
6024 continue;
6026 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6027 ipcp_lattice<tree> *lat = &plats->itself;
6028 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6030 if (!lat->bottom
6031 && !avals.m_known_vals[i])
6033 ipcp_value<tree> *val;
6034 for (val = lat->values; val; val = val->next)
6036 /* If some values generated for self-recursive calls with
6037 arithmetic jump functions fall outside of the known
6038 range for the parameter, we can skip them. */
6039 if (TREE_CODE (val->value) == INTEGER_CST
6040 && !plats->m_value_range.bottom_p ()
6041 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6042 val->value))
6044 /* This can happen also if a constant present in the source
6045 code falls outside of the range of parameter's type, so we
6046 cannot assert. */
6047 if (dump_file && (dump_flags & TDF_DETAILS))
6049 fprintf (dump_file, " - skipping%s value ",
6050 val->self_recursion_generated_p ()
6051 ? " self_recursion_generated" : "");
6052 print_ipcp_constant_value (dump_file, val->value);
6053 fprintf (dump_file, " because it is outside known "
6054 "value range.\n");
6056 continue;
6058 ret |= decide_about_value (node, i, -1, val, &avals,
6059 &self_gen_clones);
6063 if (!plats->aggs_bottom)
6065 struct ipcp_agg_lattice *aglat;
6066 ipcp_value<tree> *val;
6067 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6068 if (!aglat->bottom && aglat->values
6069 /* If the following is false, the one value has been considered
6070 for cloning for all contexts. */
6071 && (plats->aggs_contain_variable
6072 || !aglat->is_single_const ()))
6073 for (val = aglat->values; val; val = val->next)
6074 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6075 &self_gen_clones);
6078 if (!ctxlat->bottom
6079 && avals.m_known_contexts[i].useless_p ())
6081 ipcp_value<ipa_polymorphic_call_context> *val;
6082 for (val = ctxlat->values; val; val = val->next)
6083 ret |= decide_about_value (node, i, -1, val, &avals,
6084 &self_gen_clones);
6088 if (!self_gen_clones.is_empty ())
6090 self_gen_clones.safe_push (node);
6091 update_counts_for_self_gen_clones (node, self_gen_clones);
6094 if (info->do_clone_for_all_contexts)
6096 if (!dbg_cnt (ipa_cp_values))
6098 info->do_clone_for_all_contexts = false;
6099 return ret;
6102 struct cgraph_node *clone;
6103 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6105 for (int i = callers.length () - 1; i >= 0; i--)
6107 cgraph_edge *cs = callers[i];
6108 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6110 if (caller_info && caller_info->node_dead)
6111 callers.unordered_remove (i);
6114 if (!adjust_callers_for_value_intersection (callers, node))
6116 /* If node is not called by anyone, or all its caller edges are
6117 self-recursive, the node is not really in use, no need to do
6118 cloning. */
6119 info->do_clone_for_all_contexts = false;
6120 return ret;
6123 if (dump_file)
6124 fprintf (dump_file, " - Creating a specialized node of %s "
6125 "for all known contexts.\n", node->dump_name ());
6127 vec<tree> known_csts = avals.m_known_vals.copy ();
6128 vec<ipa_polymorphic_call_context> known_contexts
6129 = copy_useful_known_contexts (avals.m_known_contexts);
6130 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6131 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6132 vec<ipa_argagg_value, va_gc> *aggvals
6133 = find_aggregate_values_for_callers_subset (node, callers);
6135 if (!known_contexts_useful_p (known_contexts))
6137 known_contexts.release ();
6138 known_contexts = vNULL;
6140 clone = create_specialized_node (node, known_csts, known_contexts,
6141 aggvals, callers);
6142 info->do_clone_for_all_contexts = false;
6143 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6144 ret = true;
6147 return ret;
6150 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6152 static void
6153 spread_undeadness (struct cgraph_node *node)
6155 struct cgraph_edge *cs;
6157 for (cs = node->callees; cs; cs = cs->next_callee)
6158 if (ipa_edge_within_scc (cs))
6160 struct cgraph_node *callee;
6161 class ipa_node_params *info;
6163 callee = cs->callee->function_symbol (NULL);
6164 info = ipa_node_params_sum->get (callee);
6166 if (info && info->node_dead)
6168 info->node_dead = 0;
6169 spread_undeadness (callee);
6174 /* Return true if NODE has a caller from outside of its SCC that is not
6175 dead. Worker callback for cgraph_for_node_and_aliases. */
6177 static bool
6178 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6179 void *data ATTRIBUTE_UNUSED)
6181 struct cgraph_edge *cs;
6183 for (cs = node->callers; cs; cs = cs->next_caller)
6184 if (cs->caller->thunk
6185 && cs->caller->call_for_symbol_thunks_and_aliases
6186 (has_undead_caller_from_outside_scc_p, NULL, true))
6187 return true;
6188 else if (!ipa_edge_within_scc (cs))
6190 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6191 if (!caller_info /* Unoptimized caller are like dead ones. */
6192 || !caller_info->node_dead)
6193 return true;
6195 return false;
6199 /* Identify nodes within the same SCC as NODE which are no longer needed
6200 because of new clones and will be removed as unreachable. */
6202 static void
6203 identify_dead_nodes (struct cgraph_node *node)
6205 struct cgraph_node *v;
6206 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6207 if (v->local)
6209 ipa_node_params *info = ipa_node_params_sum->get (v);
6210 if (info
6211 && !v->call_for_symbol_thunks_and_aliases
6212 (has_undead_caller_from_outside_scc_p, NULL, true))
6213 info->node_dead = 1;
6216 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6218 ipa_node_params *info = ipa_node_params_sum->get (v);
6219 if (info && !info->node_dead)
6220 spread_undeadness (v);
6223 if (dump_file && (dump_flags & TDF_DETAILS))
6225 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6226 if (ipa_node_params_sum->get (v)
6227 && ipa_node_params_sum->get (v)->node_dead)
6228 fprintf (dump_file, " Marking node as dead: %s.\n",
6229 v->dump_name ());
6233 /* The decision stage. Iterate over the topological order of call graph nodes
6234 TOPO and make specialized clones if deemed beneficial. */
6236 static void
6237 ipcp_decision_stage (class ipa_topo_info *topo)
6239 int i;
6241 if (dump_file)
6242 fprintf (dump_file, "\nIPA decision stage:\n\n");
6244 for (i = topo->nnodes - 1; i >= 0; i--)
6246 struct cgraph_node *node = topo->order[i];
6247 bool change = false, iterate = true;
6249 while (iterate)
6251 struct cgraph_node *v;
6252 iterate = false;
6253 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6254 if (v->has_gimple_body_p ()
6255 && ipcp_versionable_function_p (v))
6256 iterate |= decide_whether_version_node (v);
6258 change |= iterate;
6260 if (change)
6261 identify_dead_nodes (node);
6265 /* Look up all VR and bits information that we have discovered and copy it
6266 over to the transformation summary. */
6268 static void
6269 ipcp_store_vr_results (void)
6271 cgraph_node *node;
6273 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6275 ipa_node_params *info = ipa_node_params_sum->get (node);
6276 bool dumped_sth = false;
6277 bool found_useful_result = false;
6278 bool do_vr = true;
6279 bool do_bits = true;
6281 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6283 if (dump_file)
6284 fprintf (dump_file, "Not considering %s for VR discovery "
6285 "and propagate; -fipa-ipa-vrp: disabled.\n",
6286 node->dump_name ());
6287 do_vr = false;
6289 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6291 if (dump_file)
6292 fprintf (dump_file, "Not considering %s for ipa bitwise "
6293 "propagation ; -fipa-bit-cp: disabled.\n",
6294 node->dump_name ());
6295 do_bits = false;
6297 if (!do_bits && !do_vr)
6298 continue;
6300 if (info->ipcp_orig_node)
6301 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6302 if (info->lattices.is_empty ())
6303 /* Newly expanded artificial thunks do not have lattices. */
6304 continue;
6306 unsigned count = ipa_get_param_count (info);
6307 for (unsigned i = 0; i < count; i++)
6309 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6310 if (do_vr
6311 && !plats->m_value_range.bottom_p ()
6312 && !plats->m_value_range.top_p ())
6314 found_useful_result = true;
6315 break;
6317 if (do_bits && plats->bits_lattice.constant_p ())
6319 found_useful_result = true;
6320 break;
6323 if (!found_useful_result)
6324 continue;
6326 ipcp_transformation_initialize ();
6327 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6328 vec_safe_reserve_exact (ts->m_vr, count);
6330 for (unsigned i = 0; i < count; i++)
6332 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6333 ipcp_bits_lattice *bits = NULL;
6335 if (do_bits
6336 && plats->bits_lattice.constant_p ()
6337 && dbg_cnt (ipa_cp_bits))
6338 bits = &plats->bits_lattice;
6340 if (do_vr
6341 && !plats->m_value_range.bottom_p ()
6342 && !plats->m_value_range.top_p ()
6343 && dbg_cnt (ipa_cp_vr))
6345 if (bits)
6347 Value_Range tmp = plats->m_value_range.m_vr;
6348 tree type = ipa_get_type (info, i);
6349 irange &r = as_a<irange> (tmp);
6350 irange_bitmask bm (wide_int::from (bits->get_value (),
6351 TYPE_PRECISION (type),
6352 TYPE_SIGN (type)),
6353 wide_int::from (bits->get_mask (),
6354 TYPE_PRECISION (type),
6355 TYPE_SIGN (type)));
6356 r.update_bitmask (bm);
6357 ipa_vr vr (tmp);
6358 ts->m_vr->quick_push (vr);
6360 else
6362 ipa_vr vr (plats->m_value_range.m_vr);
6363 ts->m_vr->quick_push (vr);
6366 else if (bits)
6368 tree type = ipa_get_type (info, i);
6369 Value_Range tmp;
6370 tmp.set_varying (type);
6371 irange &r = as_a<irange> (tmp);
6372 irange_bitmask bm (wide_int::from (bits->get_value (),
6373 TYPE_PRECISION (type),
6374 TYPE_SIGN (type)),
6375 wide_int::from (bits->get_mask (),
6376 TYPE_PRECISION (type),
6377 TYPE_SIGN (type)));
6378 r.update_bitmask (bm);
6379 ipa_vr vr (tmp);
6380 ts->m_vr->quick_push (vr);
6382 else
6384 ipa_vr vr;
6385 ts->m_vr->quick_push (vr);
6388 if (!dump_file || !bits)
6389 continue;
6391 if (!dumped_sth)
6393 fprintf (dump_file, "Propagated bits info for function %s:\n",
6394 node->dump_name ());
6395 dumped_sth = true;
6397 fprintf (dump_file, " param %i: value = ", i);
6398 print_hex (bits->get_value (), dump_file);
6399 fprintf (dump_file, ", mask = ");
6400 print_hex (bits->get_mask (), dump_file);
6401 fprintf (dump_file, "\n");
6406 /* The IPCP driver. */
6408 static unsigned int
6409 ipcp_driver (void)
6411 class ipa_topo_info topo;
6413 if (edge_clone_summaries == NULL)
6414 edge_clone_summaries = new edge_clone_summary_t (symtab);
6416 ipa_check_create_node_params ();
6417 ipa_check_create_edge_args ();
6418 clone_num_suffixes = new hash_map<const char *, unsigned>;
6420 if (dump_file)
6422 fprintf (dump_file, "\nIPA structures before propagation:\n");
6423 if (dump_flags & TDF_DETAILS)
6424 ipa_print_all_params (dump_file);
6425 ipa_print_all_jump_functions (dump_file);
6428 /* Topological sort. */
6429 build_toporder_info (&topo);
6430 /* Do the interprocedural propagation. */
6431 ipcp_propagate_stage (&topo);
6432 /* Decide what constant propagation and cloning should be performed. */
6433 ipcp_decision_stage (&topo);
6434 /* Store results of value range and bits propagation. */
6435 ipcp_store_vr_results ();
6437 /* Free all IPCP structures. */
6438 delete clone_num_suffixes;
6439 free_toporder_info (&topo);
6440 delete edge_clone_summaries;
6441 edge_clone_summaries = NULL;
6442 ipa_free_all_structures_after_ipa_cp ();
6443 if (dump_file)
6444 fprintf (dump_file, "\nIPA constant propagation end\n");
6445 return 0;
6448 /* Initialization and computation of IPCP data structures. This is the initial
6449 intraprocedural analysis of functions, which gathers information to be
6450 propagated later on. */
6452 static void
6453 ipcp_generate_summary (void)
6455 struct cgraph_node *node;
6457 if (dump_file)
6458 fprintf (dump_file, "\nIPA constant propagation start:\n");
6459 ipa_register_cgraph_hooks ();
6461 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6462 ipa_analyze_node (node);
6465 namespace {
6467 const pass_data pass_data_ipa_cp =
6469 IPA_PASS, /* type */
6470 "cp", /* name */
6471 OPTGROUP_NONE, /* optinfo_flags */
6472 TV_IPA_CONSTANT_PROP, /* tv_id */
6473 0, /* properties_required */
6474 0, /* properties_provided */
6475 0, /* properties_destroyed */
6476 0, /* todo_flags_start */
6477 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6480 class pass_ipa_cp : public ipa_opt_pass_d
6482 public:
6483 pass_ipa_cp (gcc::context *ctxt)
6484 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6485 ipcp_generate_summary, /* generate_summary */
6486 NULL, /* write_summary */
6487 NULL, /* read_summary */
6488 ipcp_write_transformation_summaries, /*
6489 write_optimization_summary */
6490 ipcp_read_transformation_summaries, /*
6491 read_optimization_summary */
6492 NULL, /* stmt_fixup */
6493 0, /* function_transform_todo_flags_start */
6494 ipcp_transform_function, /* function_transform */
6495 NULL) /* variable_transform */
6498 /* opt_pass methods: */
6499 bool gate (function *) final override
6501 /* FIXME: We should remove the optimize check after we ensure we never run
6502 IPA passes when not optimizing. */
6503 return (flag_ipa_cp && optimize) || in_lto_p;
6506 unsigned int execute (function *) final override { return ipcp_driver (); }
6508 }; // class pass_ipa_cp
6510 } // anon namespace
6512 ipa_opt_pass_d *
6513 make_pass_ipa_cp (gcc::context *ctxt)
6515 return new pass_ipa_cp (ctxt);
6518 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6519 within the same process. For use by toplev::finalize. */
6521 void
6522 ipa_cp_cc_finalize (void)
6524 base_count = profile_count::uninitialized ();
6525 overall_size = 0;
6526 orig_overall_size = 0;
6527 ipcp_free_transformation_sum ();
6530 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6531 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6532 DECL_UID-sorted vector if available (which is pre-computed only if there are
6533 many parameters). Can return -1 if param is static chain not represented
6534 among DECL_ARGUMENTS. */
6537 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6539 gcc_assert (TREE_CODE (param) == PARM_DECL);
6540 if (m_uid_to_idx)
6542 unsigned puid = DECL_UID (param);
6543 const ipa_uid_to_idx_map_elt *res
6544 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6545 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6547 return elt.uid < uid;
6549 if (res == m_uid_to_idx->end ()
6550 || res->uid != puid)
6552 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6553 return -1;
6555 return res->index;
6558 unsigned index = 0;
6559 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6560 if (p == param)
6561 return (int) index;
6563 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6564 return -1;
6567 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6568 according to the uid. */
6570 static int
6571 compare_uids (const void *a, const void *b)
6573 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6574 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6575 if (e1->uid < e2->uid)
6576 return -1;
6577 if (e1->uid > e2->uid)
6578 return 1;
6579 gcc_unreachable ();
6582 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6583 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6584 from parameters to their indices in DECL_ARGUMENTS chain. */
6586 void
6587 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6589 int c = count_formal_params (fndecl);
6590 if (c < 32)
6591 return;
6593 m_uid_to_idx = NULL;
6594 vec_safe_reserve (m_uid_to_idx, c, true);
6595 unsigned index = 0;
6596 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6598 ipa_uid_to_idx_map_elt elt;
6599 elt.uid = DECL_UID (p);
6600 elt.index = index;
6601 m_uid_to_idx->quick_push (elt);
6603 m_uid_to_idx->qsort (compare_uids);